dv-pipecat-ai 0.0.85.dev831__py3-none-any.whl → 0.0.85.dev833__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/METADATA +1 -1
- {dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/RECORD +7 -7
- pipecat/services/cartesia/tts.py +27 -0
- pipecat/transports/base_output.py +26 -3
- {dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/top_level.txt +0 -0
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
dv_pipecat_ai-0.0.85.
|
|
1
|
+
dv_pipecat_ai-0.0.85.dev833.dist-info/licenses/LICENSE,sha256=DWY2QGf2eMCFhuu2ChairtT6CB7BEFffNVhXWc4Od08,1301
|
|
2
2
|
pipecat/__init__.py,sha256=j0Xm6adxHhd7D06dIyyPV_GlBYLlBnTAERVvD_jAARQ,861
|
|
3
3
|
pipecat/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
4
4
|
pipecat/adapters/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -205,7 +205,7 @@ pipecat/services/azure/realtime/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm
|
|
|
205
205
|
pipecat/services/azure/realtime/llm.py,sha256=MnDiw-YJP3kll1gbkta4z4vsWfWZ5oBprZCinMP9O0M,2385
|
|
206
206
|
pipecat/services/cartesia/__init__.py,sha256=vzh0jBnfPwWdxFfV-tu0x1HFoOTgr9s91GYmD-CJUtY,284
|
|
207
207
|
pipecat/services/cartesia/stt.py,sha256=00k9gQYo_xPKb-RRJ-RNV4LPFw-7xXiFU7ACFLYttWY,12388
|
|
208
|
-
pipecat/services/cartesia/tts.py,sha256=
|
|
208
|
+
pipecat/services/cartesia/tts.py,sha256=I_OZCINywkDXmYzFL35MjSN8cAuNEaJs7nj0YB_obtc,27008
|
|
209
209
|
pipecat/services/cerebras/__init__.py,sha256=5zBmqq9Zfcl-HC7ylekVS5qrRedbl1mAeEwUT-T-c_o,259
|
|
210
210
|
pipecat/services/cerebras/llm.py,sha256=-yzSe_6YDGigwzES-LZS4vNXMPugmvsIYEpTySyr5nA,3047
|
|
211
211
|
pipecat/services/deepgram/__init__.py,sha256=IjRtMI7WytRDdmYVpk2qDWClXUiNgdl7ZkvEAWg1eYE,304
|
|
@@ -353,7 +353,7 @@ pipecat/transcriptions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
|
|
|
353
353
|
pipecat/transcriptions/language.py,sha256=-mWI1MiZbasuoqZTOBH69dAmoM7-UJzWq9rSCcrnmh4,8228
|
|
354
354
|
pipecat/transports/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
355
355
|
pipecat/transports/base_input.py,sha256=WGtoXXlF3GIjYgjtYnAgi8nZozd5abNlGNjwRnz8FRs,20138
|
|
356
|
-
pipecat/transports/base_output.py,sha256=
|
|
356
|
+
pipecat/transports/base_output.py,sha256=7WoXtAQAi-3OC9PC_zk61lCWlBTk5-NuTLUbsQUAI_U,36723
|
|
357
357
|
pipecat/transports/base_transport.py,sha256=JlNiH0DysTfr6azwHauJqY_Z9HJC702O29Q0qrsLrg4,7530
|
|
358
358
|
pipecat/transports/daily/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
359
359
|
pipecat/transports/daily/transport.py,sha256=VanO33ff9g6px-vwGgT6M7cMVg786pOGfMU7Okm7a78,91917
|
|
@@ -415,7 +415,7 @@ pipecat/utils/tracing/service_decorators.py,sha256=fwzxFpi8DJl6BJbK74G0UEB4ccMJg
|
|
|
415
415
|
pipecat/utils/tracing/setup.py,sha256=7TEgPNpq6M8lww8OQvf0P9FzYc5A30xICGklVA-fua0,2892
|
|
416
416
|
pipecat/utils/tracing/turn_context_provider.py,sha256=ikon3plFOx0XbMrH6DdeHttNpb-U0gzMZIm3bWLc9eI,2485
|
|
417
417
|
pipecat/utils/tracing/turn_trace_observer.py,sha256=dma16SBJpYSOE58YDWy89QzHyQFc_9gQZszKeWixuwc,9725
|
|
418
|
-
dv_pipecat_ai-0.0.85.
|
|
419
|
-
dv_pipecat_ai-0.0.85.
|
|
420
|
-
dv_pipecat_ai-0.0.85.
|
|
421
|
-
dv_pipecat_ai-0.0.85.
|
|
418
|
+
dv_pipecat_ai-0.0.85.dev833.dist-info/METADATA,sha256=8G_xwlvsUOMNtLW1-haMrK98GNcgh0tMxIuPx3s7aQk,32924
|
|
419
|
+
dv_pipecat_ai-0.0.85.dev833.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
420
|
+
dv_pipecat_ai-0.0.85.dev833.dist-info/top_level.txt,sha256=kQzG20CxGf-nSsHmtXHx3hY2-8zHA3jYg8jk0TajqXc,8
|
|
421
|
+
dv_pipecat_ai-0.0.85.dev833.dist-info/RECORD,,
|
pipecat/services/cartesia/tts.py
CHANGED
|
@@ -93,6 +93,33 @@ def language_to_cartesia_language(language: Language) -> Optional[str]:
|
|
|
93
93
|
Language.SV: "sv",
|
|
94
94
|
Language.TR: "tr",
|
|
95
95
|
Language.ZH: "zh",
|
|
96
|
+
Language.TL: "tl",
|
|
97
|
+
Language.BG: "bg",
|
|
98
|
+
Language.RO: "ro",
|
|
99
|
+
Language.AR: "ar",
|
|
100
|
+
Language.CS: "cs",
|
|
101
|
+
Language.EL: "el",
|
|
102
|
+
Language.FI: "fi",
|
|
103
|
+
Language.HR: "hr",
|
|
104
|
+
Language.MS: "ms",
|
|
105
|
+
Language.SK: "sk",
|
|
106
|
+
Language.DA: "da",
|
|
107
|
+
Language.TA: "ta",
|
|
108
|
+
Language.UK: "uk",
|
|
109
|
+
Language.HU: "hu",
|
|
110
|
+
Language.NO: "no",
|
|
111
|
+
Language.VI: "vi",
|
|
112
|
+
Language.BN: "bn",
|
|
113
|
+
Language.TH: "th",
|
|
114
|
+
Language.HE: "he",
|
|
115
|
+
Language.KA: "ka",
|
|
116
|
+
Language.ID: "id",
|
|
117
|
+
Language.TE: "te",
|
|
118
|
+
Language.GU: "gu",
|
|
119
|
+
Language.KN: "kn",
|
|
120
|
+
Language.ML: "ml",
|
|
121
|
+
Language.MR: "mr",
|
|
122
|
+
Language.PA: "pa",
|
|
96
123
|
}
|
|
97
124
|
|
|
98
125
|
result = BASE_LANGUAGES.get(language)
|
|
@@ -50,6 +50,11 @@ from pipecat.utils.time import nanoseconds_to_seconds
|
|
|
50
50
|
|
|
51
51
|
# TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
|
|
52
52
|
BOT_VAD_STOP_SECS = 0.30
|
|
53
|
+
# For the very first bot utterance (e.g., intro), we can safely
|
|
54
|
+
# detect end-of-speech sooner to improve responsiveness for the
|
|
55
|
+
# user’s first short reply. Keep conservative to avoid mid-utterance
|
|
56
|
+
# false stops when TTS streams quickly.
|
|
57
|
+
FIRST_BOT_VAD_STOP_SECS = 0.08
|
|
53
58
|
|
|
54
59
|
|
|
55
60
|
class BaseOutputTransport(FrameProcessor):
|
|
@@ -406,6 +411,9 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
406
411
|
self._bot_speaking_frame_period = 0.2
|
|
407
412
|
# Last time the bot actually spoke.
|
|
408
413
|
self._bot_speech_last_time = 0
|
|
414
|
+
# Before the first stop event, we use a shorter silence
|
|
415
|
+
# threshold to make the first turn more responsive.
|
|
416
|
+
self._first_stop_pending = True
|
|
409
417
|
|
|
410
418
|
self._audio_task: Optional[asyncio.Task] = None
|
|
411
419
|
self._video_task: Optional[asyncio.Task] = None
|
|
@@ -631,6 +639,10 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
631
639
|
|
|
632
640
|
self._bot_speaking = False
|
|
633
641
|
|
|
642
|
+
# Mark that the first stop has been completed so subsequent
|
|
643
|
+
# stops use the regular (longer) VAD stop threshold.
|
|
644
|
+
self._first_stop_pending = False
|
|
645
|
+
|
|
634
646
|
# Clean audio buffer (there could be tiny left overs if not multiple
|
|
635
647
|
# to our output chunk size).
|
|
636
648
|
self._audio_buffer = bytearray()
|
|
@@ -690,9 +702,14 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
690
702
|
async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
|
|
691
703
|
while True:
|
|
692
704
|
try:
|
|
693
|
-
|
|
694
|
-
|
|
705
|
+
# Use a shorter timeout only for the first bot stop to
|
|
706
|
+
# accelerate the initial turn handoff right after the intro.
|
|
707
|
+
timeout = (
|
|
708
|
+
FIRST_BOT_VAD_STOP_SECS
|
|
709
|
+
if getattr(self, "_first_stop_pending", True)
|
|
710
|
+
else BOT_VAD_STOP_SECS
|
|
695
711
|
)
|
|
712
|
+
frame = await asyncio.wait_for(self._audio_queue.get(), timeout=timeout)
|
|
696
713
|
yield frame
|
|
697
714
|
self._audio_queue.task_done()
|
|
698
715
|
except asyncio.TimeoutError:
|
|
@@ -713,7 +730,13 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
713
730
|
except asyncio.QueueEmpty:
|
|
714
731
|
# Notify the bot stopped speaking upstream if necessary.
|
|
715
732
|
diff_time = time.time() - last_frame_time
|
|
716
|
-
|
|
733
|
+
# Use a shorter threshold for the first stop only.
|
|
734
|
+
current_stop_secs = (
|
|
735
|
+
FIRST_BOT_VAD_STOP_SECS
|
|
736
|
+
if getattr(self, "_first_stop_pending", True)
|
|
737
|
+
else BOT_VAD_STOP_SECS
|
|
738
|
+
)
|
|
739
|
+
if diff_time > current_stop_secs:
|
|
717
740
|
await self._bot_stopped_speaking()
|
|
718
741
|
# Generate an audio frame with only the mixer's part.
|
|
719
742
|
frame = OutputAudioRawFrame(
|
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/licenses/LICENSE
RENAMED
|
File without changes
|
{dv_pipecat_ai-0.0.85.dev831.dist-info → dv_pipecat_ai-0.0.85.dev833.dist-info}/top_level.txt
RENAMED
|
File without changes
|