PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev830__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev830py3-none-any.whl → 0.0.85.dev837py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (16) hide show

{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dv-pipecat-ai
-Version: 0.0.85.dev830
+Version: 0.0.85.dev837
 Summary: An open source framework for voice (and multimodal) assistants
 License-Expression: BSD-2-Clause
 Project-URL: Source, https://github.com/pipecat-ai/pipecat

{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dv_pipecat_ai-0.0.85.dev830.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
+dv_pipecat_ai-0.0.85.dev837.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
 pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
 pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -107,7 +107,7 @@ pipecat/pipeline/to_be_updated/merge_pipeline.py,sha256=jLEWdufIW3z1xZhdoLowdJ_S
 pipecat/processors/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/processors/async_generator.py,sha256=qPOZxk5eOad_NrF_Z06vWZ6deXIxb9AKZKYO2e5pkJs,2385
 pipecat/processors/consumer_processor.py,sha256=DrWCKnfblknZJ0bLmR_unIeJ1axQw4IPUn2IB3KLGGA,3228
-pipecat/processors/dtmf_aggregator.py,sha256=mo_IXUlsnVl-_Xn8sbTGnRF4Lkts0h6E3uauGbeFyWs,10204
+pipecat/processors/dtmf_aggregator.py,sha256=k3xYncUr_8y5lrYfeX8PxqlF7jqFLshg_HB6HiFg7TA,10193
 pipecat/processors/frame_processor.py,sha256=uBu6Waa0_diMXdQXMZ5V5a_KwaaPzcieyuv5gO9u-ME,33841
 pipecat/processors/idle_frame_processor.py,sha256=z8AuhGap61lA5K35P6XCaOpn4kkmK_9NZNppbpQxheU,3124
 pipecat/processors/logger.py,sha256=8xa4KKekXQIETlQR7zoGnwUpLNo8CeDVm7YjyXePN-w,2385
@@ -122,7 +122,7 @@ pipecat/processors/aggregators/gated.py,sha256=tii0sRrBkRW6y9Xq5iTWPnqlOEejU4VqP
 pipecat/processors/aggregators/gated_llm_context.py,sha256=CPv6sMA8irD1zZ3fU1gSv6D7qcPvCA0MdpFhBtJ_ekI,3007
 pipecat/processors/aggregators/gated_open_ai_llm_context.py,sha256=DgqmdPj1u3fP_SVmxtfP7NjHqnyhN_RVVTDfmjbkxAs,361
 pipecat/processors/aggregators/llm_context.py,sha256=wNbZA0Vt0FzNc5cu06xiv1z7DIClIlfqR1ZD8EusbVw,11085
-pipecat/processors/aggregators/llm_response.py,sha256=V6wBTzfUGLJfMuI34fkf5VTR0I66AWIW8btxKI8_3IM,48795
+pipecat/processors/aggregators/llm_response.py,sha256=--6D736k5mNnIhmauRbA7ZG7H9tBR16okniz3Mpypns,48573
 pipecat/processors/aggregators/llm_response_universal.py,sha256=5PqmpATpekD8BVWyBExZgatKHsNbZem8M-A7_VwTbiQ,34334
 pipecat/processors/aggregators/openai_llm_context.py,sha256=cC8DXdVPERRN04i0i-1Ys6kusvnbMALeH-Z8Pu5K684,12999
 pipecat/processors/aggregators/sentence.py,sha256=E7e3knfQl6HEGpYMKPklF1aO_gOn-rr7SnynErwfkQk,2235
@@ -153,17 +153,18 @@ pipecat/runner/livekit.py,sha256=in-2Io3FUZV-VcZZ-gQCx9L1WnKp5sHqmm7tDYlFNl4,458
 pipecat/runner/run.py,sha256=McalzMoFYEJJRXyoD5PBAyUhHCdsEeeZJk8lBvplRck,30054
 pipecat/runner/types.py,sha256=zHjbAiU17fG0ypLXCEzPu7bpDOutAg-4gE7TESvK8n0,1761
 pipecat/runner/utils.py,sha256=Ve9rjRvbt1o8e9by0nIrCJzUDGcuJUeYYhkqycmgHXc,18682
-pipecat/serializers/__init__.py,sha256=xcmbbR7YYU5C4HPbo2WVgPij-Bl_qlrLcnunCdpcZkg,804
-pipecat/serializers/asterisk.py,sha256=bPuGuLiCf04_H0d9Gc-5BpEtqD9BRNWnpZZq5MZ1fDY,6091
+pipecat/serializers/__init__.py,sha256=z0V5GflCoPt4k2Yqm4ivuzKDh9VsYYAgK2UXZTw10aU,863
+pipecat/serializers/asterisk.py,sha256=QLJMXkU3DZ0sgFw3Vq2Zf8PHKkQQguL_v-l2Io4lZ_M,6729
 pipecat/serializers/base_serializer.py,sha256=OyBUZccs2ZT9mfkBbq2tGsUJMvci6o-j90Cl1sicPaI,2030
-pipecat/serializers/convox.py,sha256=Irby_iZywgBtevlxiC8nE2GY3eh4yNNRi2YC-0vnNTY,11155
-pipecat/serializers/custom.py,sha256=O0gHTyoSb1AZ_tEmE9VgRViYckmsNzjwCAqt-Xc2CaM,9081
+pipecat/serializers/convox.py,sha256=fj9NkFTB74B9k8qWEuICQNGUQtEV0DusaHohkOqNLa8,11145
+pipecat/serializers/custom.py,sha256=clUEqOazGe3B2XoUFRN9zkFpMd6aIZeVRTqBRHAzavM,9071
 pipecat/serializers/exotel.py,sha256=B04LtNnRMzKmaS61gPZbUjc2nbki3FmpCfUMww6cOe4,5953
 pipecat/serializers/livekit.py,sha256=OMaM7yUiHfeTPbpNxE2TrmIzjmbNQIjNvlujt81dsRI,3285
 pipecat/serializers/plivo.py,sha256=ie6VUhZDTJ7KlAuJyHNeIeMtJ3ScDq_2js1SZtz7jLI,9256
 pipecat/serializers/protobuf.py,sha256=L0jSqvgTdkfxsu6JWjYK8QSTVji9nhzmgRsEEbGU7xY,5223
 pipecat/serializers/telnyx.py,sha256=eFkC7dExDFildYLR8DPvgfHbgXlCwdSPd1vc11yxyok,10847
 pipecat/serializers/twilio.py,sha256=0emSzXVw8DU_N5RPruMekbBKku9Q429-0z1PMuYejSk,10823
+pipecat/serializers/vi.py,sha256=Q7kMXvKM493RIuOUc99LKZWgVmvd8_owAzIK_oEktfw,11150
 pipecat/services/__init__.py,sha256=8e3Ta-8_BOPozhDB3l0GJkNXs5PWhib6yqZQUof2Kvw,1209
 pipecat/services/ai_service.py,sha256=yE386fm2Id-yD4fCNfkmEMtg0lTA7PB17n2x_A_jwTg,5896
 pipecat/services/ai_services.py,sha256=_RrDWfM8adV17atzY9RxK0nXRVM5kbUkKrvN90GAWYM,795
@@ -205,7 +206,7 @@ pipecat/services/azure/realtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 pipecat/services/azure/realtime/llm.py,sha256=MnDiw-YJP3kll1gbkta4z4vsWfWZ5oBprZCinMP9O0M,2385
 pipecat/services/cartesia/__init__.py,sha256=vzh0jBnfPwWdxFfV-tu0x1HFoOTgr9s91GYmD-CJUtY,284
 pipecat/services/cartesia/stt.py,sha256=00k9gQYo_xPKb-RRJ-RNV4LPFw-7xXiFU7ACFLYttWY,12388
-pipecat/services/cartesia/tts.py,sha256=EdpVJoDhZn7N5hj-VDsCaO-W2MsA78UzOdrHR4G7w08,24355
+pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_obtc,27008
 pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
 pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
 pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
@@ -324,7 +325,7 @@ pipecat/services/sambanova/llm.py,sha256=5XVfPLEk__W8ykFqLdV95ZUhlGGkAaJwmbciLdZ
 pipecat/services/sambanova/stt.py,sha256=ZZgEZ7WQjLFHbCko-3LNTtVajjtfUvbtVLtFcaNadVQ,2536
 pipecat/services/sarvam/__init__.py,sha256=B4TN_tTHV9fWg0aSoPvfQlXISA0nJaQ9-u08I9UWvH4,280
 pipecat/services/sarvam/stt.py,sha256=p9Iq4loMwnftNZ_S0WoFSoX7iBbRKyja6RsVWbpj508,19314
-pipecat/services/sarvam/tts.py,sha256=wzfa0vvmd0wtuzqFSjRbTmHHS8H0L8nP9jkXwqFUJ3A,27638
+pipecat/services/sarvam/tts.py,sha256=lrwfdC53kZ7f2QPgNRxzryISNkrJCvNtlZ-19-iXg94,27610
 pipecat/services/simli/__init__.py,sha256=cbDcqOaGsEgKbGYKpJ1Vv7LN4ZjOWA04sE84WW5vgQI,257
 pipecat/services/simli/video.py,sha256=Zu2XLvl2Y6VHaWzT9wEdzW9d0EYoZyzYLxjQFyV8vho,8320
 pipecat/services/soniox/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -338,7 +339,7 @@ pipecat/services/together/llm.py,sha256=VSayO-U6g9Ld0xK9CXRQPUsd5gWJKtiA8qDAyXgs
 pipecat/services/ultravox/__init__.py,sha256=EoHCSXI2o0DFQslELgkhAGZtxDj63gZi-9ZEhXljaKE,259
 pipecat/services/ultravox/stt.py,sha256=uCQm_-LbycXdXRV6IE1a6Mymis6tyww7V8PnPzAQtx8,16586
 pipecat/services/vistaar/__init__.py,sha256=UFfSWFN5rbzl6NN-E_OH_MFaSYodZWNlenAU0wk-rAI,110
-pipecat/services/vistaar/llm.py,sha256=8jp9BxGYOysmD6CFyof7m2AJRbTDx4KT4kFuUc95wcc,19335
+pipecat/services/vistaar/llm.py,sha256=GNVKaelbpNH7NW7iOpBj2rJjmhMVUsPqfnBI-YgIjjw,19326
 pipecat/services/whisper/__init__.py,sha256=smADmw0Fv98k7cGRuHTEcljKTO2WdZqLpJd0qsTCwH8,281
 pipecat/services/whisper/base_stt.py,sha256=VhslESPnYIeVbmnQTzmlZPV35TH49duxYTvJe0epNnE,7850
 pipecat/services/whisper/stt.py,sha256=9Qd56vWMzg3LtHikQnfgyMtl4odE6BCHDbpAn3HSWjw,17480
@@ -353,7 +354,7 @@ pipecat/transcriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 pipecat/transcriptions/language.py,sha256=-mWI1MiZbasuoqZTOBH69dAmoM7-UJzWq9rSCcrnmh4,8228
 pipecat/transports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/transports/base_input.py,sha256=WGtoXXlF3GIjYgjtYnAgi8nZozd5abNlGNjwRnz8FRs,20138
-pipecat/transports/base_output.py,sha256=mNlIOo7tETlbYPbDyOtA2H-TkBGFKmjuCMDzQUtiwmk,35423
+pipecat/transports/base_output.py,sha256=7WoXtAQAi-3OC9PC_zk61lCWlBTk5-NuTLUbsQUAI_U,36723
 pipecat/transports/base_transport.py,sha256=JlNiH0DysTfr6azwHauJqY_Z9HJC702O29Q0qrsLrg4,7530
 pipecat/transports/daily/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/transports/daily/transport.py,sha256=VanO33ff9g6px-vwGgT6M7cMVg786pOGfMU7Okm7a78,91917
@@ -415,7 +416,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
 pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
 pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
 pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
-dv_pipecat_ai-0.0.85.dev830.dist-info/METADATA,sha256=wPJAPffJo_L5wKNWKbIxlaBG09JAGKUTFl_qkLwmoPw,32924
-dv_pipecat_ai-0.0.85.dev830.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dv_pipecat_ai-0.0.85.dev830.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
-dv_pipecat_ai-0.0.85.dev830.dist-info/RECORD,,
+dv_pipecat_ai-0.0.85.dev837.dist-info/METADATA,sha256=dQC8Y4gHZ3jPBKpybN1R9aKRUbb9mQpb0cPuLQo5KUc,32924
+dv_pipecat_ai-0.0.85.dev837.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dv_pipecat_ai-0.0.85.dev837.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
+dv_pipecat_ai-0.0.85.dev837.dist-info/RECORD,,

pipecat/processors/aggregators/llm_response.py CHANGED Viewed

@@ -49,7 +49,6 @@ from pipecat.frames.frames import (
     OpenAILLMContextAssistantTimestampFrame,
     SpeechControlParamsFrame,
     StartFrame,
-    StartInterruptionFrame,
     TextFrame,
     TranscriptDropFrame,
     TranscriptionFrame,
@@ -473,8 +472,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
             frame: The frame to process.
             direction: The direction of frame flow in the pipeline.
         """
-        if isinstance(frame, StartInterruptionFrame):
-            self.logger.debug("Received StartInterruptionFrame")
+        if isinstance(frame, InterruptionFrame):
+            self.logger.debug("Received InterruptionFrame")
         await super().process_frame(frame, direction)
         if isinstance(frame, StartFrame):
@@ -560,7 +559,7 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
                         "Triggering interruption - pushing BotInterruptionFrame and aggregation"
                     )
                     # await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
-                    await self.push_frame(StartInterruptionFrame(), FrameDirection.DOWNSTREAM)
+                    await self.push_frame(InterruptionFrame(), FrameDirection.DOWNSTREAM)
                     self.logger.debug("Pushed BotInterruptionFrame")
                 # No interruption config - normal behavior (always push aggregation)
                 await self._process_aggregation()
@@ -596,12 +595,8 @@ class LLMUserContextAggregator(LLMContextResponseAggregator):
         """Notify upstream processors that pending transcripts should be dropped."""
         if self._pending_transcription_ids:
             drop_frame = TranscriptDropFrame(transcript_ids=list(self._pending_transcription_ids))
-            self.logger.debug(
-                f"Dropping {len(self._pending_transcription_ids)} transcript chunk(s) due to {reason}"
-            )
             await self.push_frame(drop_frame, FrameDirection.UPSTREAM)
         self._pending_transcription_ids.clear()
-        self._aggregation = ""
     async def _start(self, frame: StartFrame):
         self._create_aggregation_task()

pipecat/processors/dtmf_aggregator.py CHANGED Viewed

@@ -1,3 +1,5 @@
+"""DTMF aggregator processor for collecting and flushing DTMF input digits."""
 import asyncio
 from pipecat.frames.frames import (
@@ -8,8 +10,8 @@ from pipecat.frames.frames import (
     EndFrame,
     Frame,
     InputDTMFFrame,
+    InterruptionFrame,
     StartDTMFCaptureFrame,
-    StartInterruptionFrame,
     TranscriptionFrame,
     WaitForDTMFFrame,
 )
@@ -19,10 +21,11 @@ from pipecat.utils.time import time_now_iso8601
 class DTMFAggregator(FrameProcessor):
     """Aggregates DTMF frames using idle wait logic.
     The aggregator accumulates digits from incoming InputDTMFFrame instances.
     It flushes the aggregated digits by emitting a TranscriptionFrame when:
       - No new digit arrives within the specified timeout period,
-      - The termination digit (“#”) is received, or
+      - The termination digit ("#") is received, or
       - The number of digits aggregated equals the configured 'digits' value.
     """
@@ -34,7 +37,9 @@ class DTMFAggregator(FrameProcessor):
         digits: int = None,
         **kwargs,
     ):
-        """:param timeout: Idle timeout in seconds before flushing the aggregated digits.
+        """Initialize the DTMF aggregator.
+        :param timeout: Idle timeout in seconds before flushing the aggregated digits.
         :param digits: Number of digits to aggregate before flushing.
         """
         super().__init__(**kwargs)
@@ -48,6 +53,7 @@ class DTMFAggregator(FrameProcessor):
         self._dtmf_capture_active = False
     async def process_frame(self, frame: Frame, direction: FrameDirection) -> None:
+        """Process incoming frames and handle DTMF input aggregation."""
         # Handle DTMF frames.
         await super().process_frame(frame, direction)
@@ -69,8 +75,8 @@ class DTMFAggregator(FrameProcessor):
             self._digit_event.set()  # Trigger the timeout handler
             await self._start_dtmf_capture()
             await self.push_frame(frame, direction)
-        elif isinstance(frame, StartInterruptionFrame):
-            self.logger.debug("Received StartInterruptionFrame")
+        elif isinstance(frame, InterruptionFrame):
+            self.logger.debug("Received InterruptionFrame")
             if self._aggregation:
                 await self.flush_aggregation()
             await self._end_dtmf_capture()
@@ -108,9 +114,7 @@ class DTMFAggregator(FrameProcessor):
         if "digits" in settings:
             new_digits = settings["digits"]
             if new_digits != self._digits:
-                self.logger.debug(
-                    f"Updating DTMF digits from {self._digits} to {new_digits}"
-                )
+                self.logger.debug(f"Updating DTMF digits from {self._digits} to {new_digits}")
                 self._digits = new_digits
                 settings_changed = True
@@ -125,9 +129,7 @@ class DTMFAggregator(FrameProcessor):
                 new_end_on = set(end_value)
             if new_end_on != self._end_on:
-                self.logger.debug(
-                    f"Updating DTMF end_on from {self._end_on} to {new_end_on}"
-                )
+                self.logger.debug(f"Updating DTMF end_on from {self._end_on} to {new_end_on}")
                 self._end_on = new_end_on
                 settings_changed = True
@@ -142,9 +144,7 @@ class DTMFAggregator(FrameProcessor):
                 new_reset_on = set(reset_value)
             if new_reset_on != self._reset_on:
-                self.logger.debug(
-                    f"Updating DTMF reset_on from {self._reset_on} to {new_reset_on}"
-                )
+                self.logger.debug(f"Updating DTMF reset_on from {self._reset_on} to {new_reset_on}")
                 self._reset_on = new_reset_on
                 settings_changed = True
@@ -183,9 +183,7 @@ class DTMFAggregator(FrameProcessor):
     def _create_aggregation_task(self, raise_timeout: bool = False) -> None:
         """Creates the aggregation task if it hasn't been created yet."""
         if not self._aggregation_task:
-            self._aggregation_task = self.create_task(
-                self._aggregation_task_handler(raise_timeout)
-            )
+            self._aggregation_task = self.create_task(self._aggregation_task_handler(raise_timeout))
     async def _stop_aggregation_task(self) -> None:
         """Stops the aggregation task."""
@@ -198,9 +196,7 @@ class DTMFAggregator(FrameProcessor):
         while True:
             try:
                 # Wait for a new digit signal with a timeout.
-                await asyncio.wait_for(
-                    self._digit_event.wait(), timeout=self._idle_timeout
-                )
+                await asyncio.wait_for(self._digit_event.wait(), timeout=self._idle_timeout)
                 self._digit_event.clear()
             except asyncio.TimeoutError:
                 # No new digit arrived within the timeout period; flush if needed
@@ -216,7 +212,7 @@ class DTMFAggregator(FrameProcessor):
             aggregated_frame.metadata["push_aggregation"] = True
             # Send interruption frame (as per original design)
-            await self.push_frame(StartInterruptionFrame(), FrameDirection.DOWNSTREAM)
+            await self.push_frame(InterruptionFrame(), FrameDirection.DOWNSTREAM)
             # Push the transcription frame
             await self.push_frame(aggregated_frame, FrameDirection.DOWNSTREAM)

pipecat/serializers/__init__.py CHANGED Viewed

@@ -5,6 +5,7 @@ from .exotel import ExotelFrameSerializer
 from .plivo import PlivoFrameSerializer
 from .telnyx import TelnyxFrameSerializer
 from .twilio import TwilioFrameSerializer
+from .vi import VIFrameSerializer
 __all__ = [
     "FrameSerializer",
@@ -15,6 +16,7 @@ __all__ = [
     "PlivoFrameSerializer",
     "TelnyxFrameSerializer",
     "TwilioFrameSerializer",
+    "VIFrameSerializer",
 ]
 # Optional imports

pipecat/serializers/asterisk.py CHANGED Viewed

@@ -1,4 +1,6 @@
 # asterisk_ws_serializer.py
+"""Frame serializer for Asterisk WebSocket communication."""
 import base64
 import json
 from typing import Literal, Optional
@@ -12,8 +14,8 @@ from pipecat.frames.frames import (
     EndFrame,
     Frame,
     InputAudioRawFrame,
+    InterruptionFrame,
     StartFrame,
-    StartInterruptionFrame,
     TransportMessageFrame,
     TransportMessageUrgentFrame,
 )
@@ -21,6 +23,8 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
 class AsteriskFrameSerializer(FrameSerializer):
+    """Serializes Pipecat frames to/from Asterisk WebSocket JSON messages."""
     class InputParams(BaseModel):
         """Configuration parameters for AsteriskFrameSerializer.
@@ -39,6 +43,12 @@ class AsteriskFrameSerializer(FrameSerializer):
         auto_hang_up: bool = False  # no-op here; adapter handles hangup
     def __init__(self, stream_id: str, params: Optional[InputParams] = None):
+        """Initialize the Asterisk frame serializer.
+        Args:
+            stream_id: Unique identifier for the media stream.
+            params: Configuration parameters for the serializer.
+        """
         self._stream_id = stream_id
         self._params = params or AsteriskFrameSerializer.InputParams()
         self._tel_rate = self._params.telephony_sample_rate
@@ -49,13 +59,16 @@ class AsteriskFrameSerializer(FrameSerializer):
     @property
     def type(self) -> FrameSerializerType:
+        """Return the serializer type (TEXT for JSON messages)."""
         return FrameSerializerType.TEXT  # we send/recv JSON strings
     async def setup(self, frame: StartFrame):
+        """Setup the serializer with audio parameters from the StartFrame."""
         self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
     # Pipecat -> Adapter (play to caller)
     async def serialize(self, frame: Frame) -> str | bytes | None:
+        """Serialize Pipecat frames to Asterisk WebSocket JSON messages."""
         # On pipeline end, ask bridge to hang up
         if (
             self._params.auto_hang_up
@@ -64,7 +77,7 @@ class AsteriskFrameSerializer(FrameSerializer):
         ):
             self._hangup_sent = True
             return json.dumps({"event": "hangup"})
-        if isinstance(frame, StartInterruptionFrame):
+        if isinstance(frame, InterruptionFrame):
             return json.dumps({"event": "clear", "streamId": self._stream_id})
         if isinstance(frame, AudioRawFrame):
             pcm = frame.audio
@@ -114,6 +127,7 @@ class AsteriskFrameSerializer(FrameSerializer):
     # Adapter -> Pipecat (audio from caller)
     async def deserialize(self, data: str | bytes) -> Frame | None:
+        """Deserialize Asterisk WebSocket JSON messages to Pipecat frames."""
         try:
             msg = json.loads(data)
         except Exception:

pipecat/serializers/convox.py CHANGED Viewed

@@ -22,9 +22,9 @@ from pipecat.frames.frames import (
     Frame,
     InputAudioRawFrame,
     InputDTMFFrame,
+    InterruptionFrame,
     KeypadEntry,
     StartFrame,
-    StartInterruptionFrame,
     TransportMessageFrame,
     TransportMessageUrgentFrame,
 )
@@ -117,7 +117,7 @@ class ConVoxFrameSerializer(FrameSerializer):
             self._call_ended = True
             # Return the callEnd event to be sent via the WebSocket
             return await self._send_call_end_event()
-        elif isinstance(frame, StartInterruptionFrame):
+        elif isinstance(frame, InterruptionFrame):
             # Clear/interrupt command for ConVox
             message = {
                 "event": "clear",

pipecat/serializers/custom.py CHANGED Viewed

@@ -28,8 +28,8 @@ from pipecat.frames.frames import (
     EndFrame,
     Frame,
     InputAudioRawFrame,
+    InterruptionFrame,
     StartFrame,
-    StartInterruptionFrame,
     TransportMessageFrame,
     TransportMessageUrgentFrame,
 )
@@ -121,7 +121,7 @@ class CustomFrameSerializer(FrameSerializer):
         Returns:
             Serialized data as JSON string, or None if the frame isn't handled.
         """
-        if isinstance(frame, StartInterruptionFrame):
+        if isinstance(frame, InterruptionFrame):
             # Send clear event to instruct client to discard buffered audio
             answer = {"event": "clear", "stream_sid": self._stream_sid}
             return json.dumps(answer)

pipecat/serializers/vi.py ADDED Viewed

@@ -0,0 +1,324 @@
+#
+# Copyright (c) 2024–2025, Daily
+#
+# SPDX-License-Identifier: BSD 2-Clause License
+#
+"""Vodafone Idea (VI) WebSocket frame serializer for audio streaming and call management."""
+import base64
+import json
+from datetime import datetime, timezone
+from typing import Optional
+from loguru import logger
+from pydantic import BaseModel
+from pipecat.audio.utils import create_default_resampler
+from pipecat.frames.frames import (
+    AudioRawFrame,
+    CancelFrame,
+    EndFrame,
+    Frame,
+    InputAudioRawFrame,
+    InputDTMFFrame,
+    KeypadEntry,
+    StartFrame,
+    StartInterruptionFrame,
+    TransportMessageFrame,
+    TransportMessageUrgentFrame,
+)
+from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
+class VIFrameSerializer(FrameSerializer):
+    """Serializer for Vodafone Idea (VI) WebSocket protocol.
+    This serializer handles converting between Pipecat frames and VI's WebSocket
+    protocol for bidirectional audio streaming. It supports audio conversion, DTMF events,
+    and real-time communication with VI telephony systems.
+    VI WebSocket protocol requirements:
+    - PCM audio format at 8kHz sample rate
+    - 16-bit Linear PCM encoding
+    - Base64 encoded audio payloads
+    - JSON message format for control and media events
+    - Bitrate: 128 Kbps
+    Events (VI → Endpoint):
+    - connected: WebSocket connection established
+    - start: Stream session started with call/stream IDs
+    - media: Audio data in Base64-encoded PCM
+    - dtmf: Keypad digit pressed
+    - stop: Stream ended
+    - mark: Audio playback checkpoint confirmation
+    Events (Endpoint → VI):
+    - media: Send audio back to VI
+    - mark: Request acknowledgment for audio playback
+    - clear: Clear queued audio (interruption)
+    - exit: Terminate session gracefully
+    """
+    class InputParams(BaseModel):
+        """Configuration parameters for VIFrameSerializer.
+        Attributes:
+            vi_sample_rate: Sample rate used by VI, defaults to 8000 Hz (telephony standard).
+            sample_rate: Optional override for pipeline input sample rate.
+            auto_hang_up: Whether to automatically terminate call on EndFrame.
+        """
+        vi_sample_rate: int = 8000
+        sample_rate: Optional[int] = None
+        auto_hang_up: bool = False
+    def __init__(
+        self,
+        stream_id: str,
+        call_id: Optional[str] = None,
+        params: Optional[InputParams] = None,
+    ):
+        """Initialize the VIFrameSerializer.
+        Args:
+            stream_id: The VI stream identifier.
+            call_id: The associated VI call identifier.
+            params: Configuration parameters.
+        """
+        self._stream_id = stream_id
+        self._call_id = call_id
+        self._params = params or VIFrameSerializer.InputParams()
+        self._vi_sample_rate = self._params.vi_sample_rate
+        self._sample_rate = 0  # Pipeline input rate
+        self._call_ended = False
+        self._resampler = create_default_resampler()
+    @property
+    def type(self) -> FrameSerializerType:
+        """Gets the serializer type.
+        Returns:
+            The serializer type as TEXT for JSON WebSocket messages.
+        """
+        return FrameSerializerType.TEXT
+    async def setup(self, frame: StartFrame):
+        """Sets up the serializer with pipeline configuration.
+        Args:
+            frame: The StartFrame containing pipeline configuration.
+        """
+        self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
+    async def serialize(self, frame: Frame) -> str | bytes | None:
+        """Serializes a Pipecat frame to VI WebSocket format.
+        Handles conversion of various frame types to VI WebSocket messages.
+        For EndFrames, initiates call termination if auto_hang_up is enabled.
+        Args:
+            frame: The Pipecat frame to serialize.
+        Returns:
+            Serialized data as JSON string, or None if the frame isn't handled.
+        """
+        if (
+            self._params.auto_hang_up
+            and not self._call_ended
+            and isinstance(frame, (EndFrame, CancelFrame))
+        ):
+            self._call_ended = True
+            # Return the exit event to terminate the VI session
+            return await self._send_exit_event()
+        elif isinstance(frame, StartInterruptionFrame):
+            # Clear/interrupt command for VI - clears queued audio
+            message = {
+                "event": "clear",
+                "stream_id": self._stream_id,
+                "call_id": self._call_id,
+            }
+            logger.debug(f"VI: Sending clear event for stream_id: {self._stream_id}")
+            return json.dumps(message)
+        elif isinstance(frame, AudioRawFrame):
+            if self._call_ended:
+                logger.debug("VI SERIALIZE: Skipping audio - call has ended")
+                return None
+            # Convert PCM audio to VI format
+            data = frame.audio
+            # Resample to VI sample rate (8kHz)
+            serialized_data = await self._resampler.resample(
+                data, frame.sample_rate, self._vi_sample_rate
+            )
+            # Encode as base64 for transmission
+            payload = base64.b64encode(serialized_data).decode("ascii")
+            # VI expects media event format with Base64-encoded PCM audio
+            timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
+            message = {
+                "event": "media",
+                "stream_id": self._stream_id,
+                "media": {
+                    "timestamp": timestamp,
+                    "chunk": len(serialized_data),  # Chunk size in bytes
+                    "payload": payload,
+                },
+            }
+            return json.dumps(message)
+        elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
+            # Pass through transport messages (for mark events, etc.)
+            return json.dumps(frame.message)
+        return None
+    async def _send_exit_event(self):
+        """Send an exit event to VI to terminate the session gracefully.
+        This method is called when auto_hang_up is enabled and an EndFrame or
+        CancelFrame is received. The exit event allows IVR logic to continue
+        after the WebSocket session ends.
+        """
+        try:
+            exit_event = {
+                "event": "exit",
+                "stream_id": self._stream_id,
+                "call_id": self._call_id,
+                "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            }
+            logger.info(
+                f"VI auto_hang_up: Sending exit event for stream_id: {self._stream_id}, call_id: {self._call_id}"
+            )
+            return json.dumps(exit_event)
+        except Exception as e:
+            logger.error(f"VI auto_hang_up: Failed to create exit event: {e}")
+            return None
+    async def deserialize(self, data: str | bytes) -> Frame | None:
+        """Deserializes VI WebSocket data to Pipecat frames.
+        Handles conversion of VI media events to appropriate Pipecat frames.
+        Args:
+            data: The raw WebSocket data from VI.
+        Returns:
+            A Pipecat frame corresponding to the VI event, or None if unhandled.
+        """
+        try:
+            message = json.loads(data)
+        except json.JSONDecodeError:
+            logger.error(f"Invalid JSON received from VI: {data}")
+            return None
+        # Log all incoming events for debugging and monitoring
+        event = message.get("event")
+        logger.debug(
+            f"VI INCOMING EVENT: {event} - stream_id: {self._stream_id}, call_id: {self._call_id}"
+        )
+        if event == "media":
+            # Handle incoming audio data from VI
+            media = message.get("media", {})
+            payload_base64 = media.get("payload")
+            if not payload_base64:
+                logger.warning("VI DESERIALIZE: No payload in VI media message")
+                return None
+            try:
+                payload = base64.b64decode(payload_base64)
+                chunk_size = len(payload)
+                # Log chunk info (optional)
+                logger.debug(
+                    f"VI DESERIALIZE: Received audio from VI - {chunk_size} bytes at {self._vi_sample_rate}Hz"
+                )
+            except Exception as e:
+                logger.error(f"VI DESERIALIZE: Error decoding VI audio payload: {e}")
+                return None
+            # Convert from VI sample rate (8kHz) to pipeline sample rate
+            deserialized_data = await self._resampler.resample(
+                payload,
+                self._vi_sample_rate,
+                self._sample_rate,
+            )
+            audio_frame = InputAudioRawFrame(
+                audio=deserialized_data,
+                num_channels=1,  # VI uses mono audio
+                sample_rate=self._sample_rate,
+            )
+            return audio_frame
+        elif event == "dtmf":
+            # Handle DTMF events
+            dtmf_data = message.get("dtmf", {})
+            digit = dtmf_data.get("digit")
+            if digit:
+                try:
+                    logger.info(f"VI: Received DTMF digit: {digit}")
+                    return InputDTMFFrame(KeypadEntry(digit))
+                except ValueError:
+                    logger.warning(f"Invalid DTMF digit from VI: {digit}")
+                    return None
+        elif event == "connected":
+            # Handle connection event
+            logger.info(f"VI connection established: {message}")
+            return None
+        elif event == "start":
+            # Handle stream start event
+            logger.info(f"VI stream started: {message}")
+            return None
+        elif event == "stop":
+            # Handle stream stop event
+            logger.info(f"VI stream stopped: {message}")
+            # Don't end the call here, wait for explicit exit or call end
+            return None
+        elif event == "mark":
+            # Handle mark event - checkpoint confirming audio playback completion
+            mark_data = message.get("mark", {})
+            mark_name = mark_data.get("name", "unknown")
+            logger.info(f"VI mark event received: {mark_name}")
+            # Mark events are informational, no frame to return
+            return None
+        elif event == "error":
+            # Handle error events
+            error_msg = message.get("error", "Unknown error")
+            logger.error(f"VI error: {error_msg}")
+            return None
+        elif event == "exit":
+            # Handle exit event from VI
+            logger.info("VI exit event received - terminating session")
+            self._call_ended = True
+            return CancelFrame()
+        elif event == "call_end" or event == "callEnd":
+            # Handle call end event (if VI sends this)
+            logger.info("VI call end event received")
+            self._call_ended = True
+            return CancelFrame()
+        else:
+            logger.debug(f"VI UNHANDLED EVENT: {event}")
+        return None

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -15,7 +15,6 @@ from typing import AsyncGenerator, List, Literal, Optional, Union
 from loguru import logger
 from pydantic import BaseModel, Field
 from pipecat.frames.frames import (
     CancelFrame,
     EndFrame,
@@ -49,6 +48,26 @@ except ModuleNotFoundError as e:
     raise Exception(f"Missing module: {e}")
+class GenerationConfig(BaseModel):
+    """Configuration for Cartesia Sonic-3 generation parameters.
+    Sonic-3 interprets these parameters as guidance to ensure natural speech.
+    Test against your content for best results.
+    Parameters:
+        volume: Volume multiplier for generated speech. Valid range: [0.5, 2.0]. Default is 1.0.
+        speed: Speed multiplier for generated speech. Valid range: [0.6, 1.5]. Default is 1.0.
+        emotion: Single emotion string to guide the emotional tone. Examples include neutral,
+            angry, excited, content, sad, scared. Over 60 emotions are supported. For best
+            results, use with recommended voices: Leo, Jace, Kyle, Gavin, Maya, Tessa, Dana,
+            and Marian.
+    """
+    volume: Optional[float] = None
+    speed: Optional[float] = None
+    emotion: Optional[str] = None
 def language_to_cartesia_language(language: Language) -> Optional[str]:
     """Convert a Language enum to Cartesia language code.
@@ -74,6 +93,33 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
         Language.SV: "sv",
         Language.TR: "tr",
         Language.ZH: "zh",
+        Language.TL: "tl",
+        Language.BG: "bg",
+        Language.RO: "ro",
+        Language.AR: "ar",
+        Language.CS: "cs",
+        Language.EL: "el",
+        Language.FI: "fi",
+        Language.HR: "hr",
+        Language.MS: "ms",
+        Language.SK: "sk",
+        Language.DA: "da",
+        Language.TA: "ta",
+        Language.UK: "uk",
+        Language.HU: "hu",
+        Language.NO: "no",
+        Language.VI: "vi",
+        Language.BN: "bn",
+        Language.TH: "th",
+        Language.HE: "he",
+        Language.KA: "ka",
+        Language.ID: "id",
+        Language.TE: "te",
+        Language.GU: "gu",
+        Language.KN: "kn",
+        Language.ML: "ml",
+        Language.MR: "mr",
+        Language.PA: "pa",
     }
     result = BASE_LANGUAGES.get(language)
@@ -102,16 +148,20 @@ class CartesiaTTSService(AudioContextWordTTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control.
-            emotion: List of emotion controls.
+            speed: Voice speed control for non-Sonic-3 models (literal values).
+            emotion: List of emotion controls for non-Sonic-3 models.
                 .. deprecated:: 0.0.68
                         The `emotion` parameter is deprecated and will be removed in a future version.
+            generation_config: Generation configuration for Sonic-3 models. Includes volume,
+                speed (numeric), and emotion (string) parameters.
         """
         language: Optional[Language] = Language.EN
         speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = []
+        generation_config: Optional[GenerationConfig] = None
     def __init__(
         self,
@@ -120,7 +170,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
         voice_id: str,
         cartesia_version: str = "2025-04-16",
         url: str = "wss://api.cartesia.ai/tts/websocket",
-        model: str = "sonic-2",
+        model: str = "sonic-3",
         sample_rate: Optional[int] = None,
         encoding: str = "pcm_s16le",
         container: str = "raw",
@@ -136,7 +186,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
             voice_id: ID of the voice to use for synthesis.
             cartesia_version: API version string for Cartesia service.
             url: WebSocket URL for Cartesia TTS API.
-            model: TTS model to use (e.g., "sonic-2").
+            model: TTS model to use (e.g., "sonic-3").
             sample_rate: Audio sample rate. If None, uses default.
             encoding: Audio encoding format.
             container: Audio container format.
@@ -180,6 +230,7 @@ class CartesiaTTSService(AudioContextWordTTSService):
             else "en",
             "speed": params.speed,
             "emotion": params.emotion,
+            "generation_config": params.generation_config,
         }
         self.set_model_name(model)
         self.set_voice(voice_id)
@@ -298,6 +349,11 @@ class CartesiaTTSService(AudioContextWordTTSService):
         if self._settings["speed"]:
             msg["speed"] = self._settings["speed"]
+        if self._settings["generation_config"]:
+            msg["generation_config"] = self._settings["generation_config"].model_dump(
+                exclude_none=True
+            )
         return json.dumps(msg)
     async def start(self, frame: StartFrame):
@@ -419,7 +475,6 @@ class CartesiaTTSService(AudioContextWordTTSService):
                 logger.error(f"{self} error: {msg}")
                 await self.push_frame(TTSStoppedFrame())
                 await self.stop_all_metrics()
                 await self.push_error(ErrorFrame(f"{self} error: {msg['error']}"))
                 self._context_id = None
             else:
@@ -484,23 +539,27 @@ class CartesiaHttpTTSService(TTSService):
         Parameters:
             language: Language to use for synthesis.
-            speed: Voice speed control.
-            emotion: List of emotion controls.
+            speed: Voice speed control for non-Sonic-3 models (literal values).
+            emotion: List of emotion controls for non-Sonic-3 models.
                 .. deprecated:: 0.0.68
                         The `emotion` parameter is deprecated and will be removed in a future version.
+            generation_config: Generation configuration for Sonic-3 models. Includes volume,
+                speed (numeric), and emotion (string) parameters.
         """
         language: Optional[Language] = Language.EN
         speed: Optional[Literal["slow", "normal", "fast"]] = None
         emotion: Optional[List[str]] = Field(default_factory=list)
+        generation_config: Optional[GenerationConfig] = None
     def __init__(
         self,
         *,
         api_key: str,
         voice_id: str,
-        model: str = "sonic-2",
+        model: str = "sonic-3",
         base_url: str = "https://api.cartesia.ai",
         cartesia_version: str = "2024-11-13",
         sample_rate: Optional[int] = None,
@@ -514,7 +573,7 @@ class CartesiaHttpTTSService(TTSService):
         Args:
             api_key: Cartesia API key for authentication.
             voice_id: ID of the voice to use for synthesis.
-            model: TTS model to use (e.g., "sonic-2").
+            model: TTS model to use (e.g., "sonic-3").
             base_url: Base URL for Cartesia HTTP API.
             cartesia_version: API version string for Cartesia service.
             sample_rate: Audio sample rate. If None, uses default.
@@ -541,6 +600,7 @@ class CartesiaHttpTTSService(TTSService):
             else "en",
             "speed": params.speed,
             "emotion": params.emotion,
+            "generation_config": params.generation_config,
         }
         self.set_voice(voice_id)
         self.set_model_name(model)
@@ -634,6 +694,11 @@ class CartesiaHttpTTSService(TTSService):
             if self._settings["speed"]:
                 payload["speed"] = self._settings["speed"]
+            if self._settings["generation_config"]:
+                payload["generation_config"] = self._settings["generation_config"].model_dump(
+                    exclude_none=True
+                )
             yield TTSStartedFrame()
             session = await self._client._get_session()

pipecat/services/sarvam/tts.py CHANGED Viewed

@@ -23,7 +23,6 @@ from pipecat.frames.frames import (
     InterruptionFrame,
     LLMFullResponseEndFrame,
     StartFrame,
-    StartInterruptionFrame,
     TTSAudioRawFrame,
     TTSStartedFrame,
     TTSStoppedFrame,

pipecat/services/vistaar/llm.py CHANGED Viewed

@@ -14,15 +14,15 @@ from loguru import logger
 from pydantic import BaseModel, Field
 from pipecat.frames.frames import (
-    EndFrame,
     CancelFrame,
+    EndFrame,
     Frame,
+    InterruptionFrame,
     LLMFullResponseEndFrame,
     LLMFullResponseStartFrame,
     LLMMessagesFrame,
     LLMTextFrame,
     LLMUpdateSettingsFrame,
-    StartInterruptionFrame,
 )
 from pipecat.processors.aggregators.llm_response import (
     LLMAssistantAggregatorParams,
@@ -391,7 +391,7 @@ class VistaarLLMService(LLMService):
             )
             await self.push_frame(frame, direction)
             return
-        elif isinstance(frame, StartInterruptionFrame):
+        elif isinstance(frame, InterruptionFrame):
             await self._handle_interruption()
             await self.push_frame(frame, direction)
             return
@@ -467,4 +467,4 @@ class VistaarLLMService(LLMService):
     def can_generate_metrics(self) -> bool:
         """Check if this service can generate processing metrics."""
-        return True
+        return True

pipecat/transports/base_output.py CHANGED Viewed

@@ -50,6 +50,11 @@ from pipecat.utils.time import nanoseconds_to_seconds
 # TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
 BOT_VAD_STOP_SECS = 0.30
+# For the very first bot utterance (e.g., intro), we can safely
+# detect end-of-speech sooner to improve responsiveness for the
+# user’s first short reply. Keep conservative to avoid mid-utterance
+# false stops when TTS streams quickly.
+FIRST_BOT_VAD_STOP_SECS = 0.08
 class BaseOutputTransport(FrameProcessor):
@@ -406,6 +411,9 @@ class BaseOutputTransport(FrameProcessor):
             self._bot_speaking_frame_period = 0.2
             # Last time the bot actually spoke.
             self._bot_speech_last_time = 0
+            # Before the first stop event, we use a shorter silence
+            # threshold to make the first turn more responsive.
+            self._first_stop_pending = True
             self._audio_task: Optional[asyncio.Task] = None
             self._video_task: Optional[asyncio.Task] = None
@@ -631,6 +639,10 @@ class BaseOutputTransport(FrameProcessor):
             self._bot_speaking = False
+            # Mark that the first stop has been completed so subsequent
+            # stops use the regular (longer) VAD stop threshold.
+            self._first_stop_pending = False
             # Clean audio buffer (there could be tiny left overs if not multiple
             # to our output chunk size).
             self._audio_buffer = bytearray()
@@ -690,9 +702,14 @@ class BaseOutputTransport(FrameProcessor):
             async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
                 while True:
                     try:
-                        frame = await asyncio.wait_for(
-                            self._audio_queue.get(), timeout=vad_stop_secs
+                        # Use a shorter timeout only for the first bot stop to
+                        # accelerate the initial turn handoff right after the intro.
+                        timeout = (
+                            FIRST_BOT_VAD_STOP_SECS
+                            if getattr(self, "_first_stop_pending", True)
+                            else BOT_VAD_STOP_SECS
                         )
+                        frame = await asyncio.wait_for(self._audio_queue.get(), timeout=timeout)
                         yield frame
                         self._audio_queue.task_done()
                     except asyncio.TimeoutError:
@@ -713,7 +730,13 @@ class BaseOutputTransport(FrameProcessor):
                     except asyncio.QueueEmpty:
                         # Notify the bot stopped speaking upstream if necessary.
                         diff_time = time.time() - last_frame_time
-                        if diff_time > vad_stop_secs:
+                        # Use a shorter threshold for the first stop only.
+                        current_stop_secs = (
+                            FIRST_BOT_VAD_STOP_SECS
+                            if getattr(self, "_first_stop_pending", True)
+                            else BOT_VAD_STOP_SECS
+                        )
+                        if diff_time > current_stop_secs:
                             await self._bot_stopped_speaking()
                         # Generate an audio frame with only the mixer's part.
                         frame = OutputAudioRawFrame(

{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev830.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.85.dev830__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

Potentially problematic release.

dv-pipecat-ai 0.0.85.dev830py3-none-any.whl → 0.0.85.dev837py3-none-any.whl