dv-pipecat-ai 0.0.85.dev831__py3-none-any.whl → 0.0.85.dev833__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dv-pipecat-ai
3
- Version: 0.0.85.dev831
3
+ Version: 0.0.85.dev833
4
4
  Summary: An open source framework for voice (and multimodal) assistants
5
5
  License-Expression: BSD-2-Clause
6
6
  Project-URL: Source, https://github.com/pipecat-ai/pipecat
@@ -1,4 +1,4 @@
1
- dv_pipecat_ai-0.0.85.dev831.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
1
+ dv_pipecat_ai-0.0.85.dev833.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
2
2
  pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
3
3
  pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
4
4
  pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -205,7 +205,7 @@ pipecat/services/azure/realtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
205
205
  pipecat/services/azure/realtime/llm.py,sha256=MnDiw-YJP3kll1gbkta4z4vsWfWZ5oBprZCinMP9O0M,2385
206
206
  pipecat/services/cartesia/__init__.py,sha256=vzh0jBnfPwWdxFfV-tu0x1HFoOTgr9s91GYmD-CJUtY,284
207
207
  pipecat/services/cartesia/stt.py,sha256=00k9gQYo_xPKb-RRJ-RNV4LPFw-7xXiFU7ACFLYttWY,12388
208
- pipecat/services/cartesia/tts.py,sha256=Fh6hm5AUj2rNX8J4UOjHA7uAPIGcie1Dyxv5WBvV1OY,26279
208
+ pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_obtc,27008
209
209
  pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
210
210
  pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
211
211
  pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
@@ -353,7 +353,7 @@ pipecat/transcriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
353
353
  pipecat/transcriptions/language.py,sha256=-mWI1MiZbasuoqZTOBH69dAmoM7-UJzWq9rSCcrnmh4,8228
354
354
  pipecat/transports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
355
355
  pipecat/transports/base_input.py,sha256=WGtoXXlF3GIjYgjtYnAgi8nZozd5abNlGNjwRnz8FRs,20138
356
- pipecat/transports/base_output.py,sha256=mNlIOo7tETlbYPbDyOtA2H-TkBGFKmjuCMDzQUtiwmk,35423
356
+ pipecat/transports/base_output.py,sha256=7WoXtAQAi-3OC9PC_zk61lCWlBTk5-NuTLUbsQUAI_U,36723
357
357
  pipecat/transports/base_transport.py,sha256=JlNiH0DysTfr6azwHauJqY_Z9HJC702O29Q0qrsLrg4,7530
358
358
  pipecat/transports/daily/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
359
359
  pipecat/transports/daily/transport.py,sha256=VanO33ff9g6px-vwGgT6M7cMVg786pOGfMU7Okm7a78,91917
@@ -415,7 +415,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
415
415
  pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
416
416
  pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
417
417
  pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
418
- dv_pipecat_ai-0.0.85.dev831.dist-info/METADATA,sha256=5ahEs864DAPIEEiiv7-7Oa-vRhRN1Ede341NuqED3Sw,32924
419
- dv_pipecat_ai-0.0.85.dev831.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
420
- dv_pipecat_ai-0.0.85.dev831.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
421
- dv_pipecat_ai-0.0.85.dev831.dist-info/RECORD,,
418
+ dv_pipecat_ai-0.0.85.dev833.dist-info/METADATA,sha256=8G_xwlvsUOMNtLW1-haMrK98GNcgh0tMxIuPx3s7aQk,32924
419
+ dv_pipecat_ai-0.0.85.dev833.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
420
+ dv_pipecat_ai-0.0.85.dev833.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
421
+ dv_pipecat_ai-0.0.85.dev833.dist-info/RECORD,,
@@ -93,6 +93,33 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
93
93
  Language.SV: "sv",
94
94
  Language.TR: "tr",
95
95
  Language.ZH: "zh",
96
+ Language.TL: "tl",
97
+ Language.BG: "bg",
98
+ Language.RO: "ro",
99
+ Language.AR: "ar",
100
+ Language.CS: "cs",
101
+ Language.EL: "el",
102
+ Language.FI: "fi",
103
+ Language.HR: "hr",
104
+ Language.MS: "ms",
105
+ Language.SK: "sk",
106
+ Language.DA: "da",
107
+ Language.TA: "ta",
108
+ Language.UK: "uk",
109
+ Language.HU: "hu",
110
+ Language.NO: "no",
111
+ Language.VI: "vi",
112
+ Language.BN: "bn",
113
+ Language.TH: "th",
114
+ Language.HE: "he",
115
+ Language.KA: "ka",
116
+ Language.ID: "id",
117
+ Language.TE: "te",
118
+ Language.GU: "gu",
119
+ Language.KN: "kn",
120
+ Language.ML: "ml",
121
+ Language.MR: "mr",
122
+ Language.PA: "pa",
96
123
  }
97
124
 
98
125
  result = BASE_LANGUAGES.get(language)
@@ -50,6 +50,11 @@ from pipecat.utils.time import nanoseconds_to_seconds
50
50
 
51
51
  # TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
52
52
  BOT_VAD_STOP_SECS = 0.30
53
+ # For the very first bot utterance (e.g., intro), we can safely
54
+ # detect end-of-speech sooner to improve responsiveness for the
55
+ # user’s first short reply. Keep conservative to avoid mid-utterance
56
+ # false stops when TTS streams quickly.
57
+ FIRST_BOT_VAD_STOP_SECS = 0.08
53
58
 
54
59
 
55
60
  class BaseOutputTransport(FrameProcessor):
@@ -406,6 +411,9 @@ class BaseOutputTransport(FrameProcessor):
406
411
  self._bot_speaking_frame_period = 0.2
407
412
  # Last time the bot actually spoke.
408
413
  self._bot_speech_last_time = 0
414
+ # Before the first stop event, we use a shorter silence
415
+ # threshold to make the first turn more responsive.
416
+ self._first_stop_pending = True
409
417
 
410
418
  self._audio_task: Optional[asyncio.Task] = None
411
419
  self._video_task: Optional[asyncio.Task] = None
@@ -631,6 +639,10 @@ class BaseOutputTransport(FrameProcessor):
631
639
 
632
640
  self._bot_speaking = False
633
641
 
642
+ # Mark that the first stop has been completed so subsequent
643
+ # stops use the regular (longer) VAD stop threshold.
644
+ self._first_stop_pending = False
645
+
634
646
  # Clean audio buffer (there could be tiny left overs if not multiple
635
647
  # to our output chunk size).
636
648
  self._audio_buffer = bytearray()
@@ -690,9 +702,14 @@ class BaseOutputTransport(FrameProcessor):
690
702
  async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
691
703
  while True:
692
704
  try:
693
- frame = await asyncio.wait_for(
694
- self._audio_queue.get(), timeout=vad_stop_secs
705
+ # Use a shorter timeout only for the first bot stop to
706
+ # accelerate the initial turn handoff right after the intro.
707
+ timeout = (
708
+ FIRST_BOT_VAD_STOP_SECS
709
+ if getattr(self, "_first_stop_pending", True)
710
+ else BOT_VAD_STOP_SECS
695
711
  )
712
+ frame = await asyncio.wait_for(self._audio_queue.get(), timeout=timeout)
696
713
  yield frame
697
714
  self._audio_queue.task_done()
698
715
  except asyncio.TimeoutError:
@@ -713,7 +730,13 @@ class BaseOutputTransport(FrameProcessor):
713
730
  except asyncio.QueueEmpty:
714
731
  # Notify the bot stopped speaking upstream if necessary.
715
732
  diff_time = time.time() - last_frame_time
716
- if diff_time > vad_stop_secs:
733
+ # Use a shorter threshold for the first stop only.
734
+ current_stop_secs = (
735
+ FIRST_BOT_VAD_STOP_SECS
736
+ if getattr(self, "_first_stop_pending", True)
737
+ else BOT_VAD_STOP_SECS
738
+ )
739
+ if diff_time > current_stop_secs:
717
740
  await self._bot_stopped_speaking()
718
741
  # Generate an audio frame with only the mixer's part.
719
742
  frame = OutputAudioRawFrame(