PyPI - dv-pipecat-ai - Versions diffs - 0.0.85.dev831__py3-none-any.whl → 0.0.85.dev833__py3-none-any.whl - Mend

dv-pipecat-ai 0.0.85.dev831py3-none-any.whl → 0.0.85.dev833py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: dv-pipecat-ai
-Version: 0.0.85.dev831
+Version: 0.0.85.dev833
 Summary: An open source framework for voice (and multimodal) assistants
 License-Expression: BSD-2-Clause
 Project-URL: Source, https://github.com/pipecat-ai/pipecat

{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/RECORD RENAMED Viewed

@@ -1,4 +1,4 @@
-dv_pipecat_ai-0.0.85.dev831.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
+dv_pipecat_ai-0.0.85.dev833.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
 pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
 pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -205,7 +205,7 @@ pipecat/services/azure/realtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
 pipecat/services/azure/realtime/llm.py,sha256=MnDiw-YJP3kll1gbkta4z4vsWfWZ5oBprZCinMP9O0M,2385
 pipecat/services/cartesia/__init__.py,sha256=vzh0jBnfPwWdxFfV-tu0x1HFoOTgr9s91GYmD-CJUtY,284
 pipecat/services/cartesia/stt.py,sha256=00k9gQYo_xPKb-RRJ-RNV4LPFw-7xXiFU7ACFLYttWY,12388
-pipecat/services/cartesia/tts.py,sha256=Fh6hm5AUj2rNX8J4UOjHA7uAPIGcie1Dyxv5WBvV1OY,26279
+pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_obtc,27008
 pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
 pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
 pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
@@ -353,7 +353,7 @@ pipecat/transcriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 pipecat/transcriptions/language.py,sha256=-mWI1MiZbasuoqZTOBH69dAmoM7-UJzWq9rSCcrnmh4,8228
 pipecat/transports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/transports/base_input.py,sha256=WGtoXXlF3GIjYgjtYnAgi8nZozd5abNlGNjwRnz8FRs,20138
-pipecat/transports/base_output.py,sha256=mNlIOo7tETlbYPbDyOtA2H-TkBGFKmjuCMDzQUtiwmk,35423
+pipecat/transports/base_output.py,sha256=7WoXtAQAi-3OC9PC_zk61lCWlBTk5-NuTLUbsQUAI_U,36723
 pipecat/transports/base_transport.py,sha256=JlNiH0DysTfr6azwHauJqY_Z9HJC702O29Q0qrsLrg4,7530
 pipecat/transports/daily/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 pipecat/transports/daily/transport.py,sha256=VanO33ff9g6px-vwGgT6M7cMVg786pOGfMU7Okm7a78,91917
@@ -415,7 +415,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
 pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
 pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
 pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
-dv_pipecat_ai-0.0.85.dev831.dist-info/METADATA,sha256=5ahEs864DAPIEEiiv7-7Oa-vRhRN1Ede341NuqED3Sw,32924
-dv_pipecat_ai-0.0.85.dev831.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-dv_pipecat_ai-0.0.85.dev831.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
-dv_pipecat_ai-0.0.85.dev831.dist-info/RECORD,,
+dv_pipecat_ai-0.0.85.dev833.dist-info/METADATA,sha256=8G_xwlvsUOMNtLW1-haMrK98GNcgh0tMxIuPx3s7aQk,32924
+dv_pipecat_ai-0.0.85.dev833.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+dv_pipecat_ai-0.0.85.dev833.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
+dv_pipecat_ai-0.0.85.dev833.dist-info/RECORD,,

pipecat/services/cartesia/tts.py CHANGED Viewed

@@ -93,6 +93,33 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
         Language.SV: "sv",
         Language.TR: "tr",
         Language.ZH: "zh",
+        Language.TL: "tl",
+        Language.BG: "bg",
+        Language.RO: "ro",
+        Language.AR: "ar",
+        Language.CS: "cs",
+        Language.EL: "el",
+        Language.FI: "fi",
+        Language.HR: "hr",
+        Language.MS: "ms",
+        Language.SK: "sk",
+        Language.DA: "da",
+        Language.TA: "ta",
+        Language.UK: "uk",
+        Language.HU: "hu",
+        Language.NO: "no",
+        Language.VI: "vi",
+        Language.BN: "bn",
+        Language.TH: "th",
+        Language.HE: "he",
+        Language.KA: "ka",
+        Language.ID: "id",
+        Language.TE: "te",
+        Language.GU: "gu",
+        Language.KN: "kn",
+        Language.ML: "ml",
+        Language.MR: "mr",
+        Language.PA: "pa",
     }
     result = BASE_LANGUAGES.get(language)

pipecat/transports/base_output.py CHANGED Viewed

@@ -50,6 +50,11 @@ from pipecat.utils.time import nanoseconds_to_seconds
 # TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
 BOT_VAD_STOP_SECS = 0.30
+# For the very first bot utterance (e.g., intro), we can safely
+# detect end-of-speech sooner to improve responsiveness for the
+# user’s first short reply. Keep conservative to avoid mid-utterance
+# false stops when TTS streams quickly.
+FIRST_BOT_VAD_STOP_SECS = 0.08
 class BaseOutputTransport(FrameProcessor):
@@ -406,6 +411,9 @@ class BaseOutputTransport(FrameProcessor):
             self._bot_speaking_frame_period = 0.2
             # Last time the bot actually spoke.
             self._bot_speech_last_time = 0
+            # Before the first stop event, we use a shorter silence
+            # threshold to make the first turn more responsive.
+            self._first_stop_pending = True
             self._audio_task: Optional[asyncio.Task] = None
             self._video_task: Optional[asyncio.Task] = None
@@ -631,6 +639,10 @@ class BaseOutputTransport(FrameProcessor):
             self._bot_speaking = False
+            # Mark that the first stop has been completed so subsequent
+            # stops use the regular (longer) VAD stop threshold.
+            self._first_stop_pending = False
             # Clean audio buffer (there could be tiny left overs if not multiple
             # to our output chunk size).
             self._audio_buffer = bytearray()
@@ -690,9 +702,14 @@ class BaseOutputTransport(FrameProcessor):
             async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
                 while True:
                     try:
-                        frame = await asyncio.wait_for(
-                            self._audio_queue.get(), timeout=vad_stop_secs
+                        # Use a shorter timeout only for the first bot stop to
+                        # accelerate the initial turn handoff right after the intro.
+                        timeout = (
+                            FIRST_BOT_VAD_STOP_SECS
+                            if getattr(self, "_first_stop_pending", True)
+                            else BOT_VAD_STOP_SECS
                         )
+                        frame = await asyncio.wait_for(self._audio_queue.get(), timeout=timeout)
                         yield frame
                         self._audio_queue.task_done()
                     except asyncio.TimeoutError:
@@ -713,7 +730,13 @@ class BaseOutputTransport(FrameProcessor):
                     except asyncio.QueueEmpty:
                         # Notify the bot stopped speaking upstream if necessary.
                         diff_time = time.time() - last_frame_time
-                        if diff_time > vad_stop_secs:
+                        # Use a shorter threshold for the first stop only.
+                        current_stop_secs = (
+                            FIRST_BOT_VAD_STOP_SECS
+                            if getattr(self, "_first_stop_pending", True)
+                            else BOT_VAD_STOP_SECS
+                        )
+                        if diff_time > current_stop_secs:
                             await self._bot_stopped_speaking()
                         # Generate an audio frame with only the mixer's part.
                         frame = OutputAudioRawFrame(

{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/WHEEL RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/top_level.txt RENAMED Viewed

File without changes

dv-pipecat-ai 0.0.85.dev831__py3-none-any.whl → 0.0.85.dev833__py3-none-any.whl

dv-pipecat-ai 0.0.85.dev831py3-none-any.whl → 0.0.85.dev833py3-none-any.whl