PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev881__py3-none-any.whl → 0.0.85.dev1__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev881py3-none-any.whl → 0.0.85.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (26) hide show

{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/METADATA +2 -1
{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/RECORD +24 -22
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/frames/frames.py +49 -0
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +22 -29
pipecat/processors/aggregators/llm_response.py +2 -0
pipecat/processors/dtmf_aggregator.py +175 -74
pipecat/processors/filters/stt_mute_filter.py +15 -0
pipecat/processors/user_idle_processor.py +32 -5
pipecat/serializers/__init__.py +3 -1
pipecat/serializers/convox.py +40 -3
pipecat/serializers/custom.py +257 -0
pipecat/serializers/plivo.py +4 -1
pipecat/services/elevenlabs/stt.py +18 -8
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +13 -1
pipecat/services/speechmatics/stt.py +16 -0
pipecat/services/vistaar/llm.py +45 -7
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/top_level.txt +0 -0

pipecat/services/vistaar/llm.py CHANGED Viewed

@@ -2,10 +2,11 @@
 import asyncio
 import json
+import random
 import time
 import uuid
 from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Dict, Optional
+from typing import Any, AsyncGenerator, Dict, List, Optional
 from urllib.parse import urlencode
 import httpx
@@ -13,6 +14,8 @@ from loguru import logger
 from pydantic import BaseModel, Field
 from pipecat.frames.frames import (
+    EndFrame,
+    CancelFrame,
     Frame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
@@ -53,12 +56,13 @@ class VistaarLLMService(LLMService):
             source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
             target_lang: Target language code for responses.
             session_id: Session ID for maintaining conversation context.
-            extra: Additional model-specific parameters.
+            extra: Additional model-specific parameters
         """
         source_lang: Optional[str] = Field(default="mr")
         target_lang: Optional[str] = Field(default="mr")
         session_id: Optional[str] = Field(default=None)
+        pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
         extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
     def __init__(
@@ -68,7 +72,6 @@ class VistaarLLMService(LLMService):
         params: Optional[InputParams] = None,
         timeout: float = 30.0,
         interim_timeout: float = 5.0,
-        interim_message: str = "एक क्षण थांबा, मी बघतो. ",
         **kwargs,
     ):
         """Initialize Vistaar LLM service.
@@ -77,8 +80,7 @@ class VistaarLLMService(LLMService):
             base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
             params: Input parameters for model configuration and behavior.
             timeout: Request timeout in seconds. Defaults to 30.0 seconds.
-            interim_timeout: Time in seconds before sending interim message. Defaults to 3.0 seconds.
-            interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
+            interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
             **kwargs: Additional arguments passed to the parent LLMService.
         """
         super().__init__(**kwargs)
@@ -89,10 +91,10 @@ class VistaarLLMService(LLMService):
         self._source_lang = params.source_lang
         self._target_lang = params.target_lang
         self._session_id = params.session_id or str(uuid.uuid4())
+        self._pre_query_response_phrases = params.pre_query_response_phrases or []
         self._extra = params.extra if isinstance(params.extra, dict) else {}
         self._timeout = timeout
         self._interim_timeout = interim_timeout
-        self._interim_message = interim_message
         # Create an async HTTP client
         self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -103,6 +105,8 @@ class VistaarLLMService(LLMService):
         self._partial_response = []  # Track what was actually sent before interruption
         self._interim_sent = False  # Track if interim message was sent
         self._interim_task = None  # Track interim message task
+        self._interim_completion_event = asyncio.Event()  # Track interim message completion
+        self._interim_in_progress = False  # Track if interim message is being spoken
         logger.info(
             f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
@@ -161,6 +165,10 @@ class VistaarLLMService(LLMService):
         # Set interruption flag
         self._is_interrupted = True
+        # Reset interim state on interruption
+        self._interim_in_progress = False
+        self._interim_completion_event.set()  # Unblock any waiting LLM responses
         # Cancel interim message task if active
         await self._cancel_interim_message_task(
             "Cancelled interim message task - handling interruption"
@@ -193,11 +201,28 @@ class VistaarLLMService(LLMService):
             if not self._is_interrupted and not self._interim_sent:
                 logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
                 self._interim_sent = True
-                await self.push_frame(LLMTextFrame(text=self._interim_message))
+                self._interim_in_progress = True
+                # Use random selection from pre_query_response_phrases if available, otherwise fallback to default
+                if self._pre_query_response_phrases:
+                    message = random.choice(self._pre_query_response_phrases)
+                else:
+                    message = "एक क्षण थांबा, मी बघतो. "
+                await self.push_frame(LLMTextFrame(text=message))
+                # Wait for estimated TTS duration before marking as complete
+                estimated_tts_duration = max(2.0, len(message) * 0.08)  # ~80ms per character
+                logger.info(f"Waiting {estimated_tts_duration:.2f}s for interim TTS completion")
+                await asyncio.sleep(estimated_tts_duration)
         except asyncio.CancelledError:
             logger.debug("Interim message task cancelled")
         except Exception as e:
             logger.error(f"Error sending interim message: {e}")
+        finally:
+            # Signal that interim message handling is complete
+            self._interim_completion_event.set()
+            self._interim_in_progress = False
     async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
         """Stream response from Vistaar API using Server-Sent Events.
@@ -231,6 +256,8 @@ class VistaarLLMService(LLMService):
         self._is_interrupted = False
         self._partial_response = []
         self._interim_sent = False
+        self._interim_in_progress = False
+        self._interim_completion_event.clear()  # Reset the event for new request
         try:
             # Use httpx to handle SSE streaming
@@ -291,6 +318,7 @@ class VistaarLLMService(LLMService):
             # Start response
             await self.push_frame(LLMFullResponseStartFrame())
+            await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
             await self.start_processing_metrics()
             await self.start_ttfb_metrics()
@@ -307,6 +335,15 @@ class VistaarLLMService(LLMService):
                 if first_chunk:
                     await self.stop_ttfb_metrics()
                     first_chunk = False
+                    # Wait for interim message to complete if it was sent and is in progress
+                    if self._interim_sent:
+                        logger.debug(
+                            "Waiting for interim message completion before sending LLM response"
+                        )
+                        await self._interim_completion_event.wait()
+                        logger.debug("Interim message completed, proceeding with LLM response")
                     # Cancel interim message task since we got first response
                     await self._cancel_interim_message_task(
                         "Cancelled interim message task - got first response"
@@ -334,6 +371,7 @@ class VistaarLLMService(LLMService):
             )
             await self.stop_processing_metrics()
             await self.push_frame(LLMFullResponseEndFrame())
+            await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
     async def process_frame(self, frame: Frame, direction: FrameDirection):
         """Process frames for LLM completion requests.

pipecat/serializers/genesys.py DELETED Viewed

@@ -1,95 +0,0 @@
-import base64
-import json
-from typing import Optional
-from pydantic import BaseModel
-from pipecat.audio.utils import create_default_resampler, pcm_to_ulaw, ulaw_to_pcm
-from pipecat.frames.frames import (
-    AudioRawFrame,
-    Frame,
-    InputAudioRawFrame,
-    InputDTMFFrame,
-    KeypadEntry,
-    StartFrame,
-    StartInterruptionFrame,
-    TransportMessageFrame,
-    TransportMessageUrgentFrame,
-)
-from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
-class GenesysFrameSerializer(FrameSerializer):
-    class InputParams(BaseModel):
-        genesys_sample_rate: int = 8000  # Default Genesys rate (8kHz)
-        sample_rate: Optional[int] = None  # Pipeline input rate
-    def __init__(self, session_id: str, params: InputParams = InputParams()):
-        self._session_id = session_id
-        self._params = params
-        self._genesys_sample_rate = self._params.genesys_sample_rate
-        self._sample_rate = 0  # Pipeline input rate
-        self._resampler = create_default_resampler()
-        self._seq = 1  # Sequence number for outgoing messages
-    @property
-    def type(self) -> FrameSerializerType:
-        return FrameSerializerType.TEXT
-    async def setup(self, frame: StartFrame):
-        self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
-    async def serialize(self, frame: Frame) -> str | bytes | None:
-        if isinstance(frame, StartInterruptionFrame):
-            answer = {
-                "version": "2",
-                "type": "clearAudio",  # Or appropriate event for interruption
-                "seq": self._seq,
-                "id": self._session_id,
-            }
-            self._seq += 1
-            return json.dumps(answer)
-        elif isinstance(frame, AudioRawFrame):
-            data = frame.audio
-            # Convert PCM to 8kHz μ-law for Genesys
-            serialized_data = await pcm_to_ulaw(
-                data, frame.sample_rate, self._genesys_sample_rate, self._resampler
-            )
-            payload = base64.b64encode(serialized_data).decode("utf-8")
-            answer = {
-                "version": "2",
-                "type": "audio",
-                "seq": self._seq,
-                "id": self._session_id,
-                "media": {
-                    "payload": payload,
-                    "format": "PCMU",
-                    "rate": self._genesys_sample_rate,
-                },
-            }
-            self._seq += 1
-            return json.dumps(answer)
-        elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
-            return json.dumps(frame.message)
-    async def deserialize(self, data: str | bytes) -> Frame | None:
-        message = json.loads(data)
-        if message.get("type") == "audio":
-            payload_base64 = message["media"]["payload"]
-            payload = base64.b64decode(payload_base64)
-            # Convert Genesys 8kHz μ-law to PCM at pipeline input rate
-            deserialized_data = await ulaw_to_pcm(
-                payload, self._genesys_sample_rate, self._sample_rate, self._resampler
-            )
-            audio_frame = InputAudioRawFrame(
-                audio=deserialized_data, num_channels=1, sample_rate=self._sample_rate
-            )
-            return audio_frame
-        elif message.get("type") == "dtmf":
-            digit = message.get("dtmf", {}).get("digit")
-            try:
-                return InputDTMFFrame(KeypadEntry(digit))
-            except ValueError:
-                return None
-        else:
-            return None

pipecat/services/google/test-google-chirp.py DELETED Viewed

@@ -1,45 +0,0 @@
-import asyncio
-import os
-from pipecat.frames.frames import TTSAudioRawFrame
-from pipecat.services.google.tts import GoogleTTSService
-async def test_chirp_tts():
-    # Get credentials from environment variable
-    credentials_path = (
-        "/Users/kalicharanvemuru/Documents/Code/pipecat/examples/ringg-chatbot/creds.json"
-    )
-    if not credentials_path or not os.path.exists(credentials_path):
-        raise ValueError(
-            "Please set GOOGLE_APPLICATION_CREDENTIALS environment variable to your service account key file"
-        )
-    # Initialize the TTS service with Chirp voice
-    tts = GoogleTTSService(
-        credentials_path=credentials_path,
-        voice_id="en-US-Chirp3-HD-Charon",  # Using Chirp3 HD Charon voice
-        sample_rate=24000,
-    )
-    # Test text
-    test_text = "Hello, this is a test of the Google TTS service with Chirp voice."
-    print(f"Testing TTS with text: {test_text}")
-    # Generate speech
-    try:
-        async for frame in tts.run_tts(test_text):
-            if isinstance(frame, TTSAudioRawFrame):
-                print(f"Received audio chunk of size: {len(frame.audio)} bytes")
-            else:
-                print(f"Received frame: {frame.__class__.__name__}")
-        print("TTS generation completed successfully!")
-    except Exception as e:
-        print(f"Error during TTS generation: {str(e)}")
-if __name__ == "__main__":
-    asyncio.run(test_chirp_tts())

{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.82.dev881.dist-info → dv_pipecat_ai-0.0.85.dev1.dist-info}/top_level.txt RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.82.dev881__py3-none-any.whl → 0.0.85.dev1__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev881py3-none-any.whl → 0.0.85.dev1py3-none-any.whl