PyPI - dv-pipecat-ai - Versions diffs - 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show

{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
pipecat/adapters/base_llm_adapter.py +38 -1
pipecat/adapters/services/anthropic_adapter.py +9 -14
pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
pipecat/adapters/services/bedrock_adapter.py +236 -13
pipecat/adapters/services/gemini_adapter.py +12 -8
pipecat/adapters/services/open_ai_adapter.py +19 -7
pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
pipecat/audio/dtmf/dtmf-0.wav +0 -0
pipecat/audio/dtmf/dtmf-1.wav +0 -0
pipecat/audio/dtmf/dtmf-2.wav +0 -0
pipecat/audio/dtmf/dtmf-3.wav +0 -0
pipecat/audio/dtmf/dtmf-4.wav +0 -0
pipecat/audio/dtmf/dtmf-5.wav +0 -0
pipecat/audio/dtmf/dtmf-6.wav +0 -0
pipecat/audio/dtmf/dtmf-7.wav +0 -0
pipecat/audio/dtmf/dtmf-8.wav +0 -0
pipecat/audio/dtmf/dtmf-9.wav +0 -0
pipecat/audio/dtmf/dtmf-pound.wav +0 -0
pipecat/audio/dtmf/dtmf-star.wav +0 -0
pipecat/audio/filters/krisp_viva_filter.py +193 -0
pipecat/audio/filters/noisereduce_filter.py +15 -0
pipecat/audio/turn/base_turn_analyzer.py +9 -1
pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
pipecat/audio/vad/data/README.md +10 -0
pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
pipecat/audio/vad/silero.py +9 -3
pipecat/audio/vad/vad_analyzer.py +13 -1
pipecat/extensions/voicemail/voicemail_detector.py +5 -5
pipecat/frames/frames.py +277 -86
pipecat/observers/loggers/debug_log_observer.py +3 -3
pipecat/observers/loggers/llm_log_observer.py +7 -3
pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
pipecat/pipeline/runner.py +18 -6
pipecat/pipeline/service_switcher.py +64 -36
pipecat/pipeline/task.py +125 -79
pipecat/pipeline/tts_switcher.py +30 -0
pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
pipecat/processors/aggregators/llm_context.py +40 -2
pipecat/processors/aggregators/llm_response.py +32 -15
pipecat/processors/aggregators/llm_response_universal.py +19 -15
pipecat/processors/aggregators/user_response.py +6 -6
pipecat/processors/aggregators/vision_image_frame.py +24 -2
pipecat/processors/audio/audio_buffer_processor.py +43 -8
pipecat/processors/dtmf_aggregator.py +174 -77
pipecat/processors/filters/stt_mute_filter.py +17 -0
pipecat/processors/frame_processor.py +110 -24
pipecat/processors/frameworks/langchain.py +8 -2
pipecat/processors/frameworks/rtvi.py +210 -68
pipecat/processors/frameworks/strands_agents.py +170 -0
pipecat/processors/logger.py +2 -2
pipecat/processors/transcript_processor.py +26 -5
pipecat/processors/user_idle_processor.py +35 -11
pipecat/runner/daily.py +59 -20
pipecat/runner/run.py +395 -93
pipecat/runner/types.py +6 -4
pipecat/runner/utils.py +51 -10
pipecat/serializers/__init__.py +5 -1
pipecat/serializers/asterisk.py +16 -2
pipecat/serializers/convox.py +41 -4
pipecat/serializers/custom.py +257 -0
pipecat/serializers/exotel.py +5 -5
pipecat/serializers/livekit.py +20 -0
pipecat/serializers/plivo.py +5 -5
pipecat/serializers/protobuf.py +6 -5
pipecat/serializers/telnyx.py +2 -2
pipecat/serializers/twilio.py +43 -23
pipecat/serializers/vi.py +324 -0
pipecat/services/ai_service.py +2 -6
pipecat/services/anthropic/llm.py +2 -25
pipecat/services/assemblyai/models.py +6 -0
pipecat/services/assemblyai/stt.py +13 -5
pipecat/services/asyncai/tts.py +5 -3
pipecat/services/aws/__init__.py +1 -0
pipecat/services/aws/llm.py +147 -105
pipecat/services/aws/nova_sonic/__init__.py +0 -0
pipecat/services/aws/nova_sonic/context.py +436 -0
pipecat/services/aws/nova_sonic/frames.py +25 -0
pipecat/services/aws/nova_sonic/llm.py +1265 -0
pipecat/services/aws/stt.py +3 -3
pipecat/services/aws_nova_sonic/__init__.py +19 -1
pipecat/services/aws_nova_sonic/aws.py +11 -1151
pipecat/services/aws_nova_sonic/context.py +8 -354
pipecat/services/aws_nova_sonic/frames.py +13 -17
pipecat/services/azure/llm.py +51 -1
pipecat/services/azure/realtime/__init__.py +0 -0
pipecat/services/azure/realtime/llm.py +65 -0
pipecat/services/azure/stt.py +15 -0
pipecat/services/cartesia/stt.py +77 -70
pipecat/services/cartesia/tts.py +80 -13
pipecat/services/deepgram/__init__.py +1 -0
pipecat/services/deepgram/flux/__init__.py +0 -0
pipecat/services/deepgram/flux/stt.py +640 -0
pipecat/services/elevenlabs/__init__.py +4 -1
pipecat/services/elevenlabs/stt.py +339 -0
pipecat/services/elevenlabs/tts.py +87 -46
pipecat/services/fish/tts.py +5 -2
pipecat/services/gemini_multimodal_live/events.py +38 -524
pipecat/services/gemini_multimodal_live/file_api.py +23 -173
pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
pipecat/services/gladia/stt.py +56 -72
pipecat/services/google/__init__.py +1 -0
pipecat/services/google/gemini_live/__init__.py +3 -0
pipecat/services/google/gemini_live/file_api.py +189 -0
pipecat/services/google/gemini_live/llm.py +1582 -0
pipecat/services/google/gemini_live/llm_vertex.py +184 -0
pipecat/services/google/llm.py +15 -11
pipecat/services/google/llm_openai.py +3 -3
pipecat/services/google/llm_vertex.py +86 -16
pipecat/services/google/stt.py +4 -0
pipecat/services/google/tts.py +7 -3
pipecat/services/heygen/api.py +2 -0
pipecat/services/heygen/client.py +8 -4
pipecat/services/heygen/video.py +2 -0
pipecat/services/hume/__init__.py +5 -0
pipecat/services/hume/tts.py +220 -0
pipecat/services/inworld/tts.py +6 -6
pipecat/services/llm_service.py +15 -5
pipecat/services/lmnt/tts.py +4 -2
pipecat/services/mcp_service.py +4 -2
pipecat/services/mem0/memory.py +6 -5
pipecat/services/mistral/llm.py +29 -8
pipecat/services/moondream/vision.py +42 -16
pipecat/services/neuphonic/tts.py +5 -2
pipecat/services/openai/__init__.py +1 -0
pipecat/services/openai/base_llm.py +27 -20
pipecat/services/openai/realtime/__init__.py +0 -0
pipecat/services/openai/realtime/context.py +272 -0
pipecat/services/openai/realtime/events.py +1106 -0
pipecat/services/openai/realtime/frames.py +37 -0
pipecat/services/openai/realtime/llm.py +829 -0
pipecat/services/openai/tts.py +49 -10
pipecat/services/openai_realtime/__init__.py +27 -0
pipecat/services/openai_realtime/azure.py +21 -0
pipecat/services/openai_realtime/context.py +21 -0
pipecat/services/openai_realtime/events.py +21 -0
pipecat/services/openai_realtime/frames.py +21 -0
pipecat/services/openai_realtime_beta/azure.py +16 -0
pipecat/services/openai_realtime_beta/openai.py +17 -5
pipecat/services/piper/tts.py +7 -9
pipecat/services/playht/tts.py +34 -4
pipecat/services/rime/tts.py +12 -12
pipecat/services/riva/stt.py +3 -1
pipecat/services/salesforce/__init__.py +9 -0
pipecat/services/salesforce/llm.py +700 -0
pipecat/services/sarvam/__init__.py +7 -0
pipecat/services/sarvam/stt.py +540 -0
pipecat/services/sarvam/tts.py +97 -13
pipecat/services/simli/video.py +2 -2
pipecat/services/speechmatics/stt.py +22 -10
pipecat/services/stt_service.py +47 -0
pipecat/services/tavus/video.py +2 -2
pipecat/services/tts_service.py +75 -22
pipecat/services/vision_service.py +7 -6
pipecat/services/vistaar/llm.py +51 -9
pipecat/tests/utils.py +4 -4
pipecat/transcriptions/language.py +41 -1
pipecat/transports/base_input.py +13 -34
pipecat/transports/base_output.py +140 -104
pipecat/transports/daily/transport.py +199 -26
pipecat/transports/heygen/__init__.py +0 -0
pipecat/transports/heygen/transport.py +381 -0
pipecat/transports/livekit/transport.py +228 -63
pipecat/transports/local/audio.py +6 -1
pipecat/transports/local/tk.py +11 -2
pipecat/transports/network/fastapi_websocket.py +1 -1
pipecat/transports/smallwebrtc/connection.py +103 -19
pipecat/transports/smallwebrtc/request_handler.py +246 -0
pipecat/transports/smallwebrtc/transport.py +65 -23
pipecat/transports/tavus/transport.py +23 -12
pipecat/transports/websocket/client.py +41 -5
pipecat/transports/websocket/fastapi.py +21 -11
pipecat/transports/websocket/server.py +14 -7
pipecat/transports/whatsapp/api.py +8 -0
pipecat/transports/whatsapp/client.py +47 -0
pipecat/utils/base_object.py +54 -22
pipecat/utils/redis.py +58 -0
pipecat/utils/string.py +13 -1
pipecat/utils/tracing/service_decorators.py +21 -21
pipecat/serializers/genesys.py +0 -95
pipecat/services/google/test-google-chirp.py +0 -45
pipecat/services/openai.py +0 -698
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
{dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
/pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0

pipecat/services/vision_service.py CHANGED Viewed

@@ -14,7 +14,8 @@ visual content.
 from abc import abstractmethod
 from typing import AsyncGenerator
-from pipecat.frames.frames import Frame, VisionImageRawFrame
+from pipecat.frames.frames import Frame, LLMContextFrame
+from pipecat.processors.aggregators.llm_context import LLMContext
 from pipecat.processors.frame_processor import FrameDirection
 from pipecat.services.ai_service import AIService
@@ -37,15 +38,15 @@ class VisionService(AIService):
         self._describe_text = None
     @abstractmethod
-    async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, None]:
-        """Process a vision image frame and generate results.
+    async def run_vision(self, context: LLMContext) -> AsyncGenerator[Frame, None]:
+        """Process the latest image in the context and generate results.
         This method must be implemented by subclasses to provide actual computer
         vision functionality such as image description, object detection, or
         visual question answering.
         Args:
-            frame: The vision image frame to process, containing image data.
+            context: The context to process, containing image data.
         Yields:
             Frame: Frames containing the vision analysis results, typically TextFrame
@@ -65,9 +66,9 @@ class VisionService(AIService):
         """
         await super().process_frame(frame, direction)
-        if isinstance(frame, VisionImageRawFrame):
+        if isinstance(frame, LLMContextFrame):
             await self.start_processing_metrics()
-            await self.process_generator(self.run_vision(frame))
+            await self.process_generator(self.run_vision(frame.context))
             await self.stop_processing_metrics()
         else:
             await self.push_frame(frame, direction)

pipecat/services/vistaar/llm.py CHANGED Viewed

@@ -2,10 +2,11 @@
 import asyncio
 import json
+import random
 import time
 import uuid
 from dataclasses import dataclass
-from typing import Any, AsyncGenerator, Dict, Optional
+from typing import Any, AsyncGenerator, Dict, List, Optional
 from urllib.parse import urlencode
 import httpx
@@ -13,13 +14,15 @@ from loguru import logger
 from pydantic import BaseModel, Field
 from pipecat.frames.frames import (
+    CancelFrame,
+    EndFrame,
     Frame,
+    InterruptionFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMMessagesFrame,
     LLMTextFrame,
     LLMUpdateSettingsFrame,
-    StartInterruptionFrame,
 )
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
@@ -53,12 +56,13 @@ class VistaarLLMService(LLMService):
             source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
             target_lang: Target language code for responses.
             session_id: Session ID for maintaining conversation context.
-            extra: Additional model-specific parameters.
+            extra: Additional model-specific parameters
         """
         source_lang: Optional[str] = Field(default="mr")
         target_lang: Optional[str] = Field(default="mr")
         session_id: Optional[str] = Field(default=None)
+        pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
         extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
     def __init__(
@@ -68,7 +72,6 @@ class VistaarLLMService(LLMService):
         params: Optional[InputParams] = None,
         timeout: float = 30.0,
         interim_timeout: float = 5.0,
-        interim_message: str = "एक क्षण थांबा, मी बघतो. ",
         **kwargs,
     ):
         """Initialize Vistaar LLM service.
@@ -77,8 +80,7 @@ class VistaarLLMService(LLMService):
             base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
             params: Input parameters for model configuration and behavior.
             timeout: Request timeout in seconds. Defaults to 30.0 seconds.
-            interim_timeout: Time in seconds before sending interim message. Defaults to 3.0 seconds.
-            interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
+            interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
             **kwargs: Additional arguments passed to the parent LLMService.
         """
         super().__init__(**kwargs)
@@ -89,10 +91,10 @@ class VistaarLLMService(LLMService):
         self._source_lang = params.source_lang
         self._target_lang = params.target_lang
         self._session_id = params.session_id or str(uuid.uuid4())
+        self._pre_query_response_phrases = params.pre_query_response_phrases or []
         self._extra = params.extra if isinstance(params.extra, dict) else {}
         self._timeout = timeout
         self._interim_timeout = interim_timeout
-        self._interim_message = interim_message
         # Create an async HTTP client
         self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -103,6 +105,8 @@ class VistaarLLMService(LLMService):
         self._partial_response = []  # Track what was actually sent before interruption
         self._interim_sent = False  # Track if interim message was sent
         self._interim_task = None  # Track interim message task
+        self._interim_completion_event = asyncio.Event()  # Track interim message completion
+        self._interim_in_progress = False  # Track if interim message is being spoken
         logger.info(
             f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
@@ -161,6 +165,10 @@ class VistaarLLMService(LLMService):
         # Set interruption flag
         self._is_interrupted = True
+        # Reset interim state on interruption
+        self._interim_in_progress = False
+        self._interim_completion_event.set()  # Unblock any waiting LLM responses
         # Cancel interim message task if active
         await self._cancel_interim_message_task(
             "Cancelled interim message task - handling interruption"
@@ -193,11 +201,28 @@ class VistaarLLMService(LLMService):
             if not self._is_interrupted and not self._interim_sent:
                 logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
                 self._interim_sent = True
-                await self.push_frame(LLMTextFrame(text=self._interim_message))
+                self._interim_in_progress = True
+                # Use random selection from pre_query_response_phrases if available, otherwise fallback to default
+                if self._pre_query_response_phrases:
+                    message = random.choice(self._pre_query_response_phrases)
+                else:
+                    message = "एक क्षण थांबा, मी बघतो. "
+                await self.push_frame(LLMTextFrame(text=message))
+                # Wait for estimated TTS duration before marking as complete
+                estimated_tts_duration = max(2.0, len(message) * 0.08)  # ~80ms per character
+                logger.info(f"Waiting {estimated_tts_duration:.2f}s for interim TTS completion")
+                await asyncio.sleep(estimated_tts_duration)
         except asyncio.CancelledError:
             logger.debug("Interim message task cancelled")
         except Exception as e:
             logger.error(f"Error sending interim message: {e}")
+        finally:
+            # Signal that interim message handling is complete
+            self._interim_completion_event.set()
+            self._interim_in_progress = False
     async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
         """Stream response from Vistaar API using Server-Sent Events.
@@ -231,6 +256,8 @@ class VistaarLLMService(LLMService):
         self._is_interrupted = False
         self._partial_response = []
         self._interim_sent = False
+        self._interim_in_progress = False
+        self._interim_completion_event.clear()  # Reset the event for new request
         try:
             # Use httpx to handle SSE streaming
@@ -291,6 +318,7 @@ class VistaarLLMService(LLMService):
             # Start response
             await self.push_frame(LLMFullResponseStartFrame())
+            await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
             await self.start_processing_metrics()
             await self.start_ttfb_metrics()
@@ -307,6 +335,15 @@ class VistaarLLMService(LLMService):
                 if first_chunk:
                     await self.stop_ttfb_metrics()
                     first_chunk = False
+                    # Wait for interim message to complete if it was sent and is in progress
+                    if self._interim_sent:
+                        logger.debug(
+                            "Waiting for interim message completion before sending LLM response"
+                        )
+                        await self._interim_completion_event.wait()
+                        logger.debug("Interim message completed, proceeding with LLM response")
                     # Cancel interim message task since we got first response
                     await self._cancel_interim_message_task(
                         "Cancelled interim message task - got first response"
@@ -334,6 +371,7 @@ class VistaarLLMService(LLMService):
             )
             await self.stop_processing_metrics()
             await self.push_frame(LLMFullResponseEndFrame())
+            await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
     async def process_frame(self, frame: Frame, direction: FrameDirection):
         """Process frames for LLM completion requests.
@@ -353,7 +391,7 @@ class VistaarLLMService(LLMService):
             )
             await self.push_frame(frame, direction)
             return
-        elif isinstance(frame, StartInterruptionFrame):
+        elif isinstance(frame, InterruptionFrame):
             await self._handle_interruption()
             await self.push_frame(frame, direction)
             return
@@ -426,3 +464,7 @@ class VistaarLLMService(LLMService):
             await self.cancel_task(self._interim_task)
             self._interim_task = None
             logger.debug(message)
+    def can_generate_metrics(self) -> bool:
+        """Check if this service can generate processing metrics."""
+        return True

pipecat/tests/utils.py CHANGED Viewed

@@ -128,7 +128,7 @@ async def run_test(
     expected_up_frames: Optional[Sequence[type]] = None,
     ignore_start: bool = True,
     observers: Optional[List[BaseObserver]] = None,
-    start_metadata: Optional[Dict[str, Any]] = None,
+    pipeline_params: Optional[PipelineParams] = None,
     send_end_frame: bool = True,
 ) -> Tuple[Sequence[Frame], Sequence[Frame]]:
     """Run a test pipeline with the specified processor and validate frame flow.
@@ -144,7 +144,7 @@ async def run_test(
         expected_up_frames: Expected frame types flowing upstream (optional).
         ignore_start: Whether to ignore StartFrames in frame validation.
         observers: Optional list of observers to attach to the pipeline.
-        start_metadata: Optional metadata to include with the StartFrame.
+        pipeline_params: Optional pipeline parameters.
         send_end_frame: Whether to send an EndFrame at the end of the test.
     Returns:
@@ -154,7 +154,7 @@ async def run_test(
         AssertionError: If the received frames don't match the expected frame types.
     """
     observers = observers or []
-    start_metadata = start_metadata or {}
+    pipeline_params = pipeline_params or PipelineParams()
     received_up = asyncio.Queue()
     received_down = asyncio.Queue()
@@ -173,7 +173,7 @@ async def run_test(
     task = PipelineTask(
         pipeline,
-        params=PipelineParams(start_metadata=start_metadata),
+        params=pipeline_params,
         observers=observers,
         cancel_on_idle_timeout=False,
     )

pipecat/transcriptions/language.py CHANGED Viewed

@@ -68,6 +68,9 @@ class Language(StrEnum):
     AS = "as"
     AS_IN = "as-IN"
+    # Asturian
+    AST = "ast"
     # Azerbaijani
     AZ = "az"
     AZ_AZ = "az-AZ"
@@ -101,6 +104,9 @@ class Language(StrEnum):
     CA = "ca"
     CA_ES = "ca-ES"
+    # Cebuano
+    CEB = "ceb"
     # Mandarin Chinese
     CMN = "cmn"
     CMN_CN = "cmn-CN"
@@ -185,6 +191,9 @@ class Language(StrEnum):
     FA = "fa"
     FA_IR = "fa-IR"
+    # Fulah
+    FF = "ff"
     # Finnish
     FI = "fi"
     FI_FI = "fi-FI"
@@ -251,6 +260,9 @@ class Language(StrEnum):
     ID = "id"
     ID_ID = "id-ID"
+    # Igbo
+    IG = "ig"
     # Icelandic
     IS = "is"
     IS_IS = "is-IS"
@@ -279,6 +291,9 @@ class Language(StrEnum):
     KA = "ka"
     KA_GE = "ka-GE"
+    # Kabuverdianu
+    KEA = "kea"
     # Kazakh
     KK = "kk"
     KK_KZ = "kk-KZ"
@@ -295,6 +310,13 @@ class Language(StrEnum):
     KO = "ko"
     KO_KR = "ko-KR"
+    # Kurdish
+    KU = "ku"
+    # Kyrgyz
+    KY = "ky"
+    KY_KG = "ky-KG"
     # Latin
     LA = "la"
@@ -312,6 +334,12 @@ class Language(StrEnum):
     LT = "lt"
     LT_LT = "lt-LT"
+    # Ganda
+    LG = "lg"
+    # Luo
+    LUO = "luo"
     # Latvian
     LV = "lv"
     LV_LV = "lv-LV"
@@ -366,6 +394,12 @@ class Language(StrEnum):
     NL_BE = "nl-BE"
     NL_NL = "nl-NL"
+    # Northern Sotho
+    NSO = "nso"
+    # Chichewa
+    NY = "ny"
     # Occitan
     OC = "oc"
@@ -484,6 +518,9 @@ class Language(StrEnum):
     UK = "uk"
     UK_UA = "uk-UA"
+    # Umbundu
+    UMB = "umb"
     # Urdu
     UR = "ur"
     UR_IN = "ur-IN"
@@ -497,6 +534,9 @@ class Language(StrEnum):
     VI = "vi"
     VI_VN = "vi-VN"
+    # Wolof
+    WO = "wo"
     # Wu Chinese
     WUU = "wuu"
     WUU_CN = "wuu-CN"
@@ -507,7 +547,7 @@ class Language(StrEnum):
     # Yoruba
     YO = "yo"
-    # Yue Chinese
+    # Yue Chinese (Cantonese)
     YUE = "yue"
     YUE_CN = "yue-CN"

pipecat/transports/base_input.py CHANGED Viewed

@@ -11,7 +11,6 @@ input processing, including VAD, turn analysis, and interruption management.
 """
 import asyncio
-from concurrent.futures import ThreadPoolExecutor
 from typing import Optional
 from loguru import logger
@@ -22,7 +21,6 @@ from pipecat.audio.turn.base_turn_analyzer import (
 )
 from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADState
 from pipecat.frames.frames import (
-    BotInterruptionFrame,
     BotStartedSpeakingFrame,
     BotStoppedSpeakingFrame,
     CancelFrame,
@@ -36,7 +34,6 @@ from pipecat.frames.frames import (
     MetricsFrame,
     SpeechControlParamsFrame,
     StartFrame,
-    StartInterruptionFrame,
     StopFrame,
     SystemFrame,
     UserSpeakingFrame,
@@ -81,10 +78,6 @@ class BaseInputTransport(FrameProcessor):
         # Track user speaking state for interruption logic
         self._user_speaking = False
-        # We read audio from a single queue one at a time and we then run VAD in
-        # a thread. Therefore, only one thread should be necessary.
-        self._executor = ThreadPoolExecutor(max_workers=1)
         # Task to process incoming audio (VAD) and push audio frames downstream
         # if passthrough is enabled.
         self._audio_task = None
@@ -239,6 +232,9 @@ class BaseInputTransport(FrameProcessor):
         """
         # Cancel and wait for the audio input task to finish.
         await self._cancel_audio_task()
+        # Stop audio filter.
+        if self._params.audio_in_filter:
+            await self._params.audio_in_filter.stop()
     async def set_transport_ready(self, frame: StartFrame):
         """Called when the transport is ready to stream.
@@ -289,8 +285,6 @@ class BaseInputTransport(FrameProcessor):
         elif isinstance(frame, CancelFrame):
             await self.cancel(frame)
             await self.push_frame(frame, direction)
-        elif isinstance(frame, BotInterruptionFrame):
-            await self._handle_bot_interruption(frame)
         elif isinstance(frame, BotStartedSpeakingFrame):
             await self._handle_bot_started_speaking(frame)
             await self.push_frame(frame, direction)
@@ -298,15 +292,14 @@ class BaseInputTransport(FrameProcessor):
             await self._handle_bot_stopped_speaking(frame)
             await self.push_frame(frame, direction)
         elif isinstance(frame, EmulateUserStartedSpeakingFrame):
-            logger.debug("Emulating user started speaking")
+            self.logger.debug("Emulating user started speaking")
             await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
         elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
-            logger.debug("Emulating user stopped speaking")
+            self.logger.debug("Emulating user stopped speaking")
             await self._handle_user_interruption(VADState.QUIET, emulated=True)
-        # All other system frames
         elif isinstance(frame, VADParamsUpdateFrame):
             if self.vad_analyzer:
-                self.vad_analyzer.set_params(frame.params, bot_logger=self.logger)
+                self.vad_analyzer.set_params(frame.params)
                 speech_frame = SpeechControlParamsFrame(
                     vad_params=frame.params,
                     turn_params=self._params.turn_analyzer.params
@@ -314,6 +307,8 @@ class BaseInputTransport(FrameProcessor):
                     else None,
                 )
                 await self.push_frame(speech_frame)
+            await self.push_frame(frame, direction)
+        # All other system frames
         elif isinstance(frame, SystemFrame):
             await self.push_frame(frame, direction)
         # Control frames
@@ -335,13 +330,6 @@ class BaseInputTransport(FrameProcessor):
     # Handle interruptions
     #
-    async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
-        """Handle bot interruption frames."""
-        self.logger.debug("Bot interruption")
-        if self.interruptions_allowed:
-            await self._start_interruption()
-            await self.push_frame(StartInterruptionFrame())
     async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
         """Handle user interruption events based on speaking state."""
         if vad_state == VADState.SPEAKING:
@@ -353,7 +341,7 @@ class BaseInputTransport(FrameProcessor):
             await self.push_frame(downstream_frame)
             await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
-            # Only push StartInterruptionFrame if:
+            # Only push InterruptionFrame if:
             # 1. No interruption config is set, OR
             # 2. Interruption config is set but bot is not speaking
             should_push_immediate_interruption = (
@@ -362,13 +350,9 @@ class BaseInputTransport(FrameProcessor):
             # Make sure we notify about interruptions quickly out-of-band.
             if should_push_immediate_interruption and self.interruptions_allowed:
-                await self._start_interruption()
-                # Push an out-of-band frame (i.e. not using the ordered push
-                # frame task) to stop everything, specially at the output
-                # transport.
-                await self.push_frame(StartInterruptionFrame())
+                await self.push_interruption_task_frame_and_wait()
             elif self.interruption_strategies and self._bot_speaking:
-                logger.debug(
+                self.logger.debug(
                     "User started speaking while bot is speaking with interruption config - "
                     "deferring interruption to aggregator"
                 )
@@ -381,9 +365,6 @@ class BaseInputTransport(FrameProcessor):
             await self.push_frame(downstream_frame)
             await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
-            if self.interruptions_allowed:
-                await self._stop_interruption()
     #
     # Handle bot speaking state
     #
@@ -416,9 +397,7 @@ class BaseInputTransport(FrameProcessor):
         """Analyze audio frame for voice activity."""
         state = VADState.QUIET
         if self.vad_analyzer:
-            state = await self.get_event_loop().run_in_executor(
-                self._executor, self.vad_analyzer.analyze_audio, audio_frame.audio
-            )
+            state = await self.vad_analyzer.analyze_audio(audio_frame.audio)
         return state
     async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
@@ -511,7 +490,7 @@ class BaseInputTransport(FrameProcessor):
                 self._audio_in_queue.task_done()
             except asyncio.TimeoutError:
                 if self._user_speaking:
-                    logger.warning(
+                    self.logger.warning(
                         "Forcing user stopped speaking due to timeout receiving audio frame!"
                     )
                     vad_state = VADState.QUIET

dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.82.dev857py3-none-any.whl → 0.0.85.dev837py3-none-any.whl