dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
pipecat/services/sarvam/tts.py
CHANGED
|
@@ -20,9 +20,9 @@ from pipecat.frames.frames import (
|
|
|
20
20
|
EndFrame,
|
|
21
21
|
ErrorFrame,
|
|
22
22
|
Frame,
|
|
23
|
+
InterruptionFrame,
|
|
23
24
|
LLMFullResponseEndFrame,
|
|
24
25
|
StartFrame,
|
|
25
|
-
StartInterruptionFrame,
|
|
26
26
|
TTSAudioRawFrame,
|
|
27
27
|
TTSStartedFrame,
|
|
28
28
|
TTSStoppedFrame,
|
|
@@ -76,17 +76,29 @@ class SarvamHttpTTSService(TTSService):
|
|
|
76
76
|
|
|
77
77
|
Example::
|
|
78
78
|
|
|
79
|
-
tts =
|
|
79
|
+
tts = SarvamHttpTTSService(
|
|
80
80
|
api_key="your-api-key",
|
|
81
81
|
voice_id="anushka",
|
|
82
82
|
model="bulbul:v2",
|
|
83
83
|
aiohttp_session=session,
|
|
84
|
-
params=
|
|
84
|
+
params=SarvamHttpTTSService.InputParams(
|
|
85
85
|
language=Language.HI,
|
|
86
86
|
pitch=0.1,
|
|
87
87
|
pace=1.2
|
|
88
88
|
)
|
|
89
89
|
)
|
|
90
|
+
|
|
91
|
+
# For bulbul v3 beta with any speaker:
|
|
92
|
+
tts_v3 = SarvamHttpTTSService(
|
|
93
|
+
api_key="your-api-key",
|
|
94
|
+
voice_id="speaker_name",
|
|
95
|
+
model="bulbul:v3,
|
|
96
|
+
aiohttp_session=session,
|
|
97
|
+
params=SarvamHttpTTSService.InputParams(
|
|
98
|
+
language=Language.HI,
|
|
99
|
+
temperature=0.8
|
|
100
|
+
)
|
|
101
|
+
)
|
|
90
102
|
"""
|
|
91
103
|
|
|
92
104
|
class InputParams(BaseModel):
|
|
@@ -105,6 +117,14 @@ class SarvamHttpTTSService(TTSService):
|
|
|
105
117
|
pace: Optional[float] = Field(default=1.0, ge=0.3, le=3.0)
|
|
106
118
|
loudness: Optional[float] = Field(default=1.0, ge=0.1, le=3.0)
|
|
107
119
|
enable_preprocessing: Optional[bool] = False
|
|
120
|
+
temperature: Optional[float] = Field(
|
|
121
|
+
default=0.6,
|
|
122
|
+
ge=0.01,
|
|
123
|
+
le=1.0,
|
|
124
|
+
description="Controls the randomness of the output for bulbul v3 beta. "
|
|
125
|
+
"Lower values make the output more focused and deterministic, while "
|
|
126
|
+
"higher values make it more random. Range: 0.01 to 1.0. Default: 0.6.",
|
|
127
|
+
)
|
|
108
128
|
|
|
109
129
|
def __init__(
|
|
110
130
|
self,
|
|
@@ -124,7 +144,7 @@ class SarvamHttpTTSService(TTSService):
|
|
|
124
144
|
api_key: Sarvam AI API subscription key.
|
|
125
145
|
aiohttp_session: Shared aiohttp session for making requests.
|
|
126
146
|
voice_id: Speaker voice ID (e.g., "anushka", "meera"). Defaults to "anushka".
|
|
127
|
-
model: TTS model to use ("bulbul:
|
|
147
|
+
model: TTS model to use ("bulbul:v2" or "bulbul:v3-beta" or "bulbul:v3"). Defaults to "bulbul:v2".
|
|
128
148
|
base_url: Sarvam AI API base URL. Defaults to "https://api.sarvam.ai".
|
|
129
149
|
sample_rate: Audio sample rate in Hz (8000, 16000, 22050, 24000). If None, uses default.
|
|
130
150
|
params: Additional voice and preprocessing parameters. If None, uses defaults.
|
|
@@ -138,16 +158,32 @@ class SarvamHttpTTSService(TTSService):
|
|
|
138
158
|
self._base_url = base_url
|
|
139
159
|
self._session = aiohttp_session
|
|
140
160
|
|
|
161
|
+
# Build base settings common to all models
|
|
141
162
|
self._settings = {
|
|
142
163
|
"language": (
|
|
143
164
|
self.language_to_service_language(params.language) if params.language else "en-IN"
|
|
144
165
|
),
|
|
145
|
-
"pitch": params.pitch,
|
|
146
|
-
"pace": params.pace,
|
|
147
|
-
"loudness": params.loudness,
|
|
148
166
|
"enable_preprocessing": params.enable_preprocessing,
|
|
149
167
|
}
|
|
150
168
|
|
|
169
|
+
# Add model-specific parameters
|
|
170
|
+
if model in ("bulbul:v3-beta", "bulbul:v3"):
|
|
171
|
+
self._settings.update(
|
|
172
|
+
{
|
|
173
|
+
"temperature": getattr(params, "temperature", 0.6),
|
|
174
|
+
"model": model,
|
|
175
|
+
}
|
|
176
|
+
)
|
|
177
|
+
else:
|
|
178
|
+
self._settings.update(
|
|
179
|
+
{
|
|
180
|
+
"pitch": params.pitch,
|
|
181
|
+
"pace": params.pace,
|
|
182
|
+
"loudness": params.loudness,
|
|
183
|
+
"model": model,
|
|
184
|
+
}
|
|
185
|
+
)
|
|
186
|
+
|
|
151
187
|
self.set_model_name(model)
|
|
152
188
|
self.set_voice(voice_id)
|
|
153
189
|
|
|
@@ -275,6 +311,18 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
275
311
|
pace=1.2
|
|
276
312
|
)
|
|
277
313
|
)
|
|
314
|
+
|
|
315
|
+
# For bulbul v3 beta with any speaker and temperature:
|
|
316
|
+
# Note: pace and loudness are not supported for bulbul v3 and bulbul v3 beta
|
|
317
|
+
tts_v3 = SarvamTTSService(
|
|
318
|
+
api_key="your-api-key",
|
|
319
|
+
voice_id="speaker_name",
|
|
320
|
+
model="bulbul:v3",
|
|
321
|
+
params=SarvamTTSService.InputParams(
|
|
322
|
+
language=Language.HI,
|
|
323
|
+
temperature=0.8
|
|
324
|
+
)
|
|
325
|
+
)
|
|
278
326
|
"""
|
|
279
327
|
|
|
280
328
|
class InputParams(BaseModel):
|
|
@@ -310,6 +358,14 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
310
358
|
output_audio_codec: Optional[str] = "linear16"
|
|
311
359
|
output_audio_bitrate: Optional[str] = "128k"
|
|
312
360
|
language: Optional[Language] = Language.EN
|
|
361
|
+
temperature: Optional[float] = Field(
|
|
362
|
+
default=0.6,
|
|
363
|
+
ge=0.01,
|
|
364
|
+
le=1.0,
|
|
365
|
+
description="Controls the randomness of the output for bulbul v3 beta. "
|
|
366
|
+
"Lower values make the output more focused and deterministic, while "
|
|
367
|
+
"higher values make it more random. Range: 0.01 to 1.0. Default: 0.6.",
|
|
368
|
+
)
|
|
313
369
|
|
|
314
370
|
def __init__(
|
|
315
371
|
self,
|
|
@@ -329,6 +385,7 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
329
385
|
Args:
|
|
330
386
|
api_key: Sarvam API key for authenticating TTS requests.
|
|
331
387
|
model: Identifier of the Sarvam speech model (default "bulbul:v2").
|
|
388
|
+
Supports "bulbul:v2", "bulbul:v3-beta" and "bulbul:v3".
|
|
332
389
|
voice_id: Voice identifier for synthesis (default "anushka").
|
|
333
390
|
url: WebSocket URL for connecting to the TTS backend (default production URL).
|
|
334
391
|
aiohttp_session: Optional shared aiohttp session. To maintain backward compatibility.
|
|
@@ -371,15 +428,12 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
371
428
|
self._api_key = api_key
|
|
372
429
|
self.set_model_name(model)
|
|
373
430
|
self.set_voice(voice_id)
|
|
374
|
-
#
|
|
431
|
+
# Build base settings common to all models
|
|
375
432
|
self._settings = {
|
|
376
433
|
"target_language_code": (
|
|
377
434
|
self.language_to_service_language(params.language) if params.language else "en-IN"
|
|
378
435
|
),
|
|
379
|
-
"pitch": params.pitch,
|
|
380
|
-
"pace": params.pace,
|
|
381
436
|
"speaker": voice_id,
|
|
382
|
-
"loudness": params.loudness,
|
|
383
437
|
"speech_sample_rate": 0,
|
|
384
438
|
"enable_preprocessing": params.enable_preprocessing,
|
|
385
439
|
"min_buffer_size": params.min_buffer_size,
|
|
@@ -387,6 +441,24 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
387
441
|
"output_audio_codec": params.output_audio_codec,
|
|
388
442
|
"output_audio_bitrate": params.output_audio_bitrate,
|
|
389
443
|
}
|
|
444
|
+
|
|
445
|
+
# Add model-specific parameters
|
|
446
|
+
if model in ("bulbul:v3-beta", "bulbul:v3"):
|
|
447
|
+
self._settings.update(
|
|
448
|
+
{
|
|
449
|
+
"temperature": getattr(params, "temperature", 0.6),
|
|
450
|
+
"model": model,
|
|
451
|
+
}
|
|
452
|
+
)
|
|
453
|
+
else:
|
|
454
|
+
self._settings.update(
|
|
455
|
+
{
|
|
456
|
+
"pitch": params.pitch,
|
|
457
|
+
"pace": params.pace,
|
|
458
|
+
"loudness": params.loudness,
|
|
459
|
+
"model": model,
|
|
460
|
+
}
|
|
461
|
+
)
|
|
390
462
|
self._started = False
|
|
391
463
|
|
|
392
464
|
self._receive_task = None
|
|
@@ -455,7 +527,7 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
455
527
|
direction: The direction to push the frame.
|
|
456
528
|
"""
|
|
457
529
|
await super().push_frame(frame, direction)
|
|
458
|
-
if isinstance(frame, (TTSStoppedFrame,
|
|
530
|
+
if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
|
|
459
531
|
self._started = False
|
|
460
532
|
|
|
461
533
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
@@ -525,6 +597,7 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
525
597
|
logger.debug("Connected to Sarvam TTS Websocket")
|
|
526
598
|
await self._send_config()
|
|
527
599
|
|
|
600
|
+
await self._call_event_handler("on_connected")
|
|
528
601
|
except Exception as e:
|
|
529
602
|
logger.error(f"{self} initialization error: {e}")
|
|
530
603
|
self._websocket = None
|
|
@@ -556,6 +629,10 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
556
629
|
await self._websocket.close()
|
|
557
630
|
except Exception as e:
|
|
558
631
|
logger.error(f"{self} error closing websocket: {e}")
|
|
632
|
+
finally:
|
|
633
|
+
self._started = False
|
|
634
|
+
self._websocket = None
|
|
635
|
+
await self._call_event_handler("on_disconnected")
|
|
559
636
|
|
|
560
637
|
def _get_websocket(self):
|
|
561
638
|
if self._websocket:
|
|
@@ -605,8 +682,15 @@ class SarvamTTSService(InterruptibleTTSService):
|
|
|
605
682
|
logger.warning("Service is disconnecting, ignoring text send")
|
|
606
683
|
return
|
|
607
684
|
|
|
685
|
+
# Validate text input
|
|
686
|
+
if not text or not isinstance(text, str) or not text.strip():
|
|
687
|
+
logger.warning(f"Invalid text input for Sarvam TTS: {repr(text)}")
|
|
688
|
+
return
|
|
689
|
+
|
|
608
690
|
if self._websocket and self._websocket.state == State.OPEN:
|
|
609
|
-
msg = {"type": "text", "data": {"text": text}}
|
|
691
|
+
msg = {"type": "text", "data": {"text": text.strip()}}
|
|
692
|
+
logger.info(f"Sarvam TTS: Sending text message: {repr(text.strip())}")
|
|
693
|
+
logger.debug(f"Sarvam TTS: Full message payload: {msg}")
|
|
610
694
|
await self._websocket.send(json.dumps(msg))
|
|
611
695
|
else:
|
|
612
696
|
logger.warning("WebSocket not ready, cannot send text")
|
pipecat/services/simli/video.py
CHANGED
|
@@ -15,8 +15,8 @@ from pipecat.frames.frames import (
|
|
|
15
15
|
CancelFrame,
|
|
16
16
|
EndFrame,
|
|
17
17
|
Frame,
|
|
18
|
+
InterruptionFrame,
|
|
18
19
|
OutputImageRawFrame,
|
|
19
|
-
StartInterruptionFrame,
|
|
20
20
|
TTSAudioRawFrame,
|
|
21
21
|
TTSStoppedFrame,
|
|
22
22
|
UserStartedSpeakingFrame,
|
|
@@ -179,7 +179,7 @@ class SimliVideoService(FrameProcessor):
|
|
|
179
179
|
return
|
|
180
180
|
elif isinstance(frame, (EndFrame, CancelFrame)):
|
|
181
181
|
await self._stop()
|
|
182
|
-
elif isinstance(frame, (
|
|
182
|
+
elif isinstance(frame, (InterruptionFrame, UserStartedSpeakingFrame)):
|
|
183
183
|
if not self._previously_interrupted:
|
|
184
184
|
await self._simli_client.clearBuffer()
|
|
185
185
|
self._previously_interrupted = self._is_trinity_avatar
|
|
@@ -19,7 +19,6 @@ from loguru import logger
|
|
|
19
19
|
from pydantic import BaseModel
|
|
20
20
|
|
|
21
21
|
from pipecat.frames.frames import (
|
|
22
|
-
BotInterruptionFrame,
|
|
23
22
|
CancelFrame,
|
|
24
23
|
EndFrame,
|
|
25
24
|
ErrorFrame,
|
|
@@ -551,7 +550,7 @@ class SpeechmaticsSTTService(STTService):
|
|
|
551
550
|
|
|
552
551
|
@self._client.on(ServerMessageType.END_OF_UTTERANCE)
|
|
553
552
|
def _evt_on_end_of_utterance(message: dict[str, Any]):
|
|
554
|
-
logger.debug("End of utterance received from STT")
|
|
553
|
+
self.logger.debug("End of utterance received from STT")
|
|
555
554
|
asyncio.run_coroutine_threadsafe(
|
|
556
555
|
self._handle_end_of_utterance(), self.get_event_loop()
|
|
557
556
|
)
|
|
@@ -578,6 +577,7 @@ class SpeechmaticsSTTService(STTService):
|
|
|
578
577
|
),
|
|
579
578
|
)
|
|
580
579
|
logger.debug(f"{self} Connected to Speechmatics STT service")
|
|
580
|
+
await self._call_event_handler("on_connected")
|
|
581
581
|
except Exception as e:
|
|
582
582
|
logger.error(f"{self} Error connecting to Speechmatics: {e}")
|
|
583
583
|
self._client = None
|
|
@@ -596,6 +596,7 @@ class SpeechmaticsSTTService(STTService):
|
|
|
596
596
|
logger.error(f"{self} Error closing Speechmatics client: {e}")
|
|
597
597
|
finally:
|
|
598
598
|
self._client = None
|
|
599
|
+
await self._call_event_handler("on_disconnected")
|
|
599
600
|
|
|
600
601
|
def _process_config(self) -> None:
|
|
601
602
|
"""Create a formatted STT transcription config.
|
|
@@ -619,7 +620,7 @@ class SpeechmaticsSTTService(STTService):
|
|
|
619
620
|
transcription_config.additional_vocab = [
|
|
620
621
|
{
|
|
621
622
|
"content": e.content,
|
|
622
|
-
"sounds_like": e.sounds_like,
|
|
623
|
+
**({"sounds_like": e.sounds_like} if e.sounds_like else {}),
|
|
623
624
|
}
|
|
624
625
|
for e in self._params.additional_vocab
|
|
625
626
|
]
|
|
@@ -749,14 +750,13 @@ class SpeechmaticsSTTService(STTService):
|
|
|
749
750
|
return
|
|
750
751
|
|
|
751
752
|
# Frames to send
|
|
752
|
-
upstream_frames: list[Frame] = []
|
|
753
753
|
downstream_frames: list[Frame] = []
|
|
754
754
|
|
|
755
755
|
# If VAD is enabled, then send a speaking frame
|
|
756
756
|
if self._params.enable_vad and not self._is_speaking:
|
|
757
757
|
logger.debug("User started speaking")
|
|
758
758
|
self._is_speaking = True
|
|
759
|
-
|
|
759
|
+
await self.push_interruption_task_frame_and_wait()
|
|
760
760
|
downstream_frames += [UserStartedSpeakingFrame()]
|
|
761
761
|
|
|
762
762
|
# If final, then re-parse into TranscriptionFrame
|
|
@@ -775,7 +775,7 @@ class SpeechmaticsSTTService(STTService):
|
|
|
775
775
|
]
|
|
776
776
|
|
|
777
777
|
# Log transcript(s)
|
|
778
|
-
logger.debug(f"Finalized transcript: {[f.text for f in downstream_frames]}")
|
|
778
|
+
self.logger.debug(f"Finalized transcript: {[f.text for f in downstream_frames]}")
|
|
779
779
|
|
|
780
780
|
# Return as interim results (unformatted)
|
|
781
781
|
else:
|
|
@@ -794,10 +794,6 @@ class SpeechmaticsSTTService(STTService):
|
|
|
794
794
|
self._is_speaking = False
|
|
795
795
|
downstream_frames += [UserStoppedSpeakingFrame()]
|
|
796
796
|
|
|
797
|
-
# Send UPSTREAM frames
|
|
798
|
-
for frame in upstream_frames:
|
|
799
|
-
await self.push_frame(frame, FrameDirection.UPSTREAM)
|
|
800
|
-
|
|
801
797
|
# Send the DOWNSTREAM frames
|
|
802
798
|
for frame in downstream_frames:
|
|
803
799
|
await self.push_frame(frame, FrameDirection.DOWNSTREAM)
|
|
@@ -996,6 +992,22 @@ def _language_to_speechmatics_language(language: Language) -> str:
|
|
|
996
992
|
# List of supported input languages
|
|
997
993
|
BASE_LANGUAGES = {
|
|
998
994
|
Language.AR: "ar",
|
|
995
|
+
Language.AR_AE: "ar",
|
|
996
|
+
Language.AR_BH: "ar",
|
|
997
|
+
Language.AR_DZ: "ar",
|
|
998
|
+
Language.AR_EG: "ar",
|
|
999
|
+
Language.AR_IQ: "ar",
|
|
1000
|
+
Language.AR_JO: "ar",
|
|
1001
|
+
Language.AR_KW: "ar",
|
|
1002
|
+
Language.AR_LB: "ar",
|
|
1003
|
+
Language.AR_LY: "ar",
|
|
1004
|
+
Language.AR_MA: "ar",
|
|
1005
|
+
Language.AR_OM: "ar",
|
|
1006
|
+
Language.AR_QA: "ar",
|
|
1007
|
+
Language.AR_SA: "ar",
|
|
1008
|
+
Language.AR_SY: "ar",
|
|
1009
|
+
Language.AR_TN: "ar",
|
|
1010
|
+
Language.AR_YE: "ar",
|
|
999
1011
|
Language.BA: "ba",
|
|
1000
1012
|
Language.EU: "eu",
|
|
1001
1013
|
Language.BE: "be",
|
pipecat/services/stt_service.py
CHANGED
|
@@ -16,6 +16,7 @@ from loguru import logger
|
|
|
16
16
|
from pipecat.frames.frames import (
|
|
17
17
|
AudioRawFrame,
|
|
18
18
|
BotStoppedSpeakingFrame,
|
|
19
|
+
ErrorFrame,
|
|
19
20
|
Frame,
|
|
20
21
|
StartFrame,
|
|
21
22
|
STTMuteFrame,
|
|
@@ -25,6 +26,7 @@ from pipecat.frames.frames import (
|
|
|
25
26
|
)
|
|
26
27
|
from pipecat.processors.frame_processor import FrameDirection
|
|
27
28
|
from pipecat.services.ai_service import AIService
|
|
29
|
+
from pipecat.services.websocket_service import WebsocketService
|
|
28
30
|
from pipecat.transcriptions.language import Language
|
|
29
31
|
|
|
30
32
|
|
|
@@ -34,6 +36,25 @@ class STTService(AIService):
|
|
|
34
36
|
Provides common functionality for STT services including audio passthrough,
|
|
35
37
|
muting, settings management, and audio processing. Subclasses must implement
|
|
36
38
|
the run_stt method to provide actual speech recognition.
|
|
39
|
+
|
|
40
|
+
Event handlers:
|
|
41
|
+
on_connected: Called when connected to the STT service.
|
|
42
|
+
on_connected: Called when disconnected from the STT service.
|
|
43
|
+
on_connection_error: Called when a connection to the STT service error occurs.
|
|
44
|
+
|
|
45
|
+
Example::
|
|
46
|
+
|
|
47
|
+
@stt.event_handler("on_connected")
|
|
48
|
+
async def on_connected(stt: STTService):
|
|
49
|
+
logger.debug(f"STT connected")
|
|
50
|
+
|
|
51
|
+
@stt.event_handler("on_disconnected")
|
|
52
|
+
async def on_disconnected(stt: STTService):
|
|
53
|
+
logger.debug(f"STT disconnected")
|
|
54
|
+
|
|
55
|
+
@stt.event_handler("on_connection_error")
|
|
56
|
+
async def on_connection_error(stt: STTService, error: str):
|
|
57
|
+
logger.error(f"STT connection error: {error}")
|
|
37
58
|
"""
|
|
38
59
|
|
|
39
60
|
def __init__(
|
|
@@ -64,6 +85,10 @@ class STTService(AIService):
|
|
|
64
85
|
self._voicemail_detect: bool = False
|
|
65
86
|
self._user_id: str = ""
|
|
66
87
|
|
|
88
|
+
self._register_event_handler("on_connected")
|
|
89
|
+
self._register_event_handler("on_disconnected")
|
|
90
|
+
self._register_event_handler("on_connection_error")
|
|
91
|
+
|
|
67
92
|
@property
|
|
68
93
|
def is_muted(self) -> bool:
|
|
69
94
|
"""Check if the STT service is currently muted.
|
|
@@ -298,3 +323,25 @@ class SegmentedSTTService(STTService):
|
|
|
298
323
|
if not self._user_speaking and len(self._audio_buffer) > self._audio_buffer_size_1s:
|
|
299
324
|
discarded = len(self._audio_buffer) - self._audio_buffer_size_1s
|
|
300
325
|
self._audio_buffer = self._audio_buffer[discarded:]
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class WebsocketSTTService(STTService, WebsocketService):
|
|
329
|
+
"""Base class for websocket-based STT services.
|
|
330
|
+
|
|
331
|
+
Combines STT functionality with websocket connectivity, providing automatic
|
|
332
|
+
error handling and reconnection capabilities.
|
|
333
|
+
"""
|
|
334
|
+
|
|
335
|
+
def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
|
|
336
|
+
"""Initialize the Websocket STT service.
|
|
337
|
+
|
|
338
|
+
Args:
|
|
339
|
+
reconnect_on_error: Whether to automatically reconnect on websocket errors.
|
|
340
|
+
**kwargs: Additional arguments passed to parent classes.
|
|
341
|
+
"""
|
|
342
|
+
STTService.__init__(self, **kwargs)
|
|
343
|
+
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
|
344
|
+
|
|
345
|
+
async def _report_error(self, error: ErrorFrame):
|
|
346
|
+
await self._call_event_handler("on_connection_error", error.error)
|
|
347
|
+
await self.push_error(error)
|
pipecat/services/tavus/video.py
CHANGED
|
@@ -23,12 +23,12 @@ from pipecat.frames.frames import (
|
|
|
23
23
|
CancelFrame,
|
|
24
24
|
EndFrame,
|
|
25
25
|
Frame,
|
|
26
|
+
InterruptionFrame,
|
|
26
27
|
OutputAudioRawFrame,
|
|
27
28
|
OutputImageRawFrame,
|
|
28
29
|
OutputTransportReadyFrame,
|
|
29
30
|
SpeechOutputAudioRawFrame,
|
|
30
31
|
StartFrame,
|
|
31
|
-
StartInterruptionFrame,
|
|
32
32
|
TTSAudioRawFrame,
|
|
33
33
|
TTSStartedFrame,
|
|
34
34
|
)
|
|
@@ -222,7 +222,7 @@ class TavusVideoService(AIService):
|
|
|
222
222
|
"""
|
|
223
223
|
await super().process_frame(frame, direction)
|
|
224
224
|
|
|
225
|
-
if isinstance(frame,
|
|
225
|
+
if isinstance(frame, InterruptionFrame):
|
|
226
226
|
await self._handle_interruptions()
|
|
227
227
|
await self.push_frame(frame, direction)
|
|
228
228
|
elif isinstance(frame, TTSAudioRawFrame):
|
pipecat/services/tts_service.py
CHANGED
|
@@ -8,7 +8,18 @@
|
|
|
8
8
|
|
|
9
9
|
import asyncio
|
|
10
10
|
from abc import abstractmethod
|
|
11
|
-
from typing import
|
|
11
|
+
from typing import (
|
|
12
|
+
Any,
|
|
13
|
+
AsyncGenerator,
|
|
14
|
+
AsyncIterator,
|
|
15
|
+
Callable,
|
|
16
|
+
Dict,
|
|
17
|
+
List,
|
|
18
|
+
Mapping,
|
|
19
|
+
Optional,
|
|
20
|
+
Sequence,
|
|
21
|
+
Tuple,
|
|
22
|
+
)
|
|
12
23
|
|
|
13
24
|
from loguru import logger
|
|
14
25
|
|
|
@@ -20,10 +31,10 @@ from pipecat.frames.frames import (
|
|
|
20
31
|
ErrorFrame,
|
|
21
32
|
Frame,
|
|
22
33
|
InterimTranscriptionFrame,
|
|
34
|
+
InterruptionFrame,
|
|
23
35
|
LLMFullResponseEndFrame,
|
|
24
36
|
LLMFullResponseStartFrame,
|
|
25
37
|
StartFrame,
|
|
26
|
-
StartInterruptionFrame,
|
|
27
38
|
TextFrame,
|
|
28
39
|
TranscriptionFrame,
|
|
29
40
|
TTSAudioRawFrame,
|
|
@@ -49,6 +60,25 @@ class TTSService(AIService):
|
|
|
49
60
|
Provides common functionality for TTS services including text aggregation,
|
|
50
61
|
filtering, audio generation, and frame management. Supports configurable
|
|
51
62
|
sentence aggregation, silence insertion, and frame processing control.
|
|
63
|
+
|
|
64
|
+
Event handlers:
|
|
65
|
+
on_connected: Called when connected to the STT service.
|
|
66
|
+
on_connected: Called when disconnected from the STT service.
|
|
67
|
+
on_connection_error: Called when a connection to the STT service error occurs.
|
|
68
|
+
|
|
69
|
+
Example::
|
|
70
|
+
|
|
71
|
+
@tts.event_handler("on_connected")
|
|
72
|
+
async def on_connected(tts: TTSService):
|
|
73
|
+
logger.debug(f"TTS connected")
|
|
74
|
+
|
|
75
|
+
@tts.event_handler("on_disconnected")
|
|
76
|
+
async def on_disconnected(tts: TTSService):
|
|
77
|
+
logger.debug(f"TTS disconnected")
|
|
78
|
+
|
|
79
|
+
@tts.event_handler("on_connection_error")
|
|
80
|
+
async def on_connection_error(stt: TTSService, error: str):
|
|
81
|
+
logger.error(f"TTS connection error: {error}")
|
|
52
82
|
"""
|
|
53
83
|
|
|
54
84
|
def __init__(
|
|
@@ -98,6 +128,7 @@ class TTSService(AIService):
|
|
|
98
128
|
|
|
99
129
|
.. deprecated:: 0.0.59
|
|
100
130
|
Use `text_filters` instead, which allows multiple filters.
|
|
131
|
+
text_formatter: Optional callable receiving text and language code, returning formatted text.
|
|
101
132
|
|
|
102
133
|
transport_destination: Destination for generated audio frames.
|
|
103
134
|
**kwargs: Additional arguments passed to the parent AIService.
|
|
@@ -124,7 +155,6 @@ class TTSService(AIService):
|
|
|
124
155
|
|
|
125
156
|
self._tracing_enabled: bool = False
|
|
126
157
|
|
|
127
|
-
|
|
128
158
|
if text_filter:
|
|
129
159
|
import warnings
|
|
130
160
|
|
|
@@ -143,6 +173,10 @@ class TTSService(AIService):
|
|
|
143
173
|
|
|
144
174
|
self._processing_text: bool = False
|
|
145
175
|
|
|
176
|
+
self._register_event_handler("on_connected")
|
|
177
|
+
self._register_event_handler("on_disconnected")
|
|
178
|
+
self._register_event_handler("on_connection_error")
|
|
179
|
+
|
|
146
180
|
@property
|
|
147
181
|
def sample_rate(self) -> int:
|
|
148
182
|
"""Get the current sample rate for audio output.
|
|
@@ -319,7 +353,7 @@ class TTSService(AIService):
|
|
|
319
353
|
and not isinstance(frame, TranscriptionFrame)
|
|
320
354
|
):
|
|
321
355
|
await self._process_text_frame(frame)
|
|
322
|
-
elif isinstance(frame,
|
|
356
|
+
elif isinstance(frame, InterruptionFrame):
|
|
323
357
|
await self._handle_interruption(frame, direction)
|
|
324
358
|
await self.push_frame(frame, direction)
|
|
325
359
|
elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
|
|
@@ -377,14 +411,44 @@ class TTSService(AIService):
|
|
|
377
411
|
await super().push_frame(frame, direction)
|
|
378
412
|
|
|
379
413
|
if self._push_stop_frames and (
|
|
380
|
-
isinstance(frame,
|
|
414
|
+
isinstance(frame, InterruptionFrame)
|
|
381
415
|
or isinstance(frame, TTSStartedFrame)
|
|
382
416
|
or isinstance(frame, TTSAudioRawFrame)
|
|
383
417
|
or isinstance(frame, TTSStoppedFrame)
|
|
384
418
|
):
|
|
385
419
|
await self._stop_frame_queue.put(frame)
|
|
386
420
|
|
|
387
|
-
async def
|
|
421
|
+
async def _stream_audio_frames_from_iterator(
|
|
422
|
+
self, iterator: AsyncIterator[bytes], *, strip_wav_header: bool
|
|
423
|
+
) -> AsyncGenerator[Frame, None]:
|
|
424
|
+
buffer = bytearray()
|
|
425
|
+
need_to_strip_wav_header = strip_wav_header
|
|
426
|
+
async for chunk in iterator:
|
|
427
|
+
if need_to_strip_wav_header and chunk.startswith(b"RIFF"):
|
|
428
|
+
chunk = chunk[44:]
|
|
429
|
+
need_to_strip_wav_header = False
|
|
430
|
+
|
|
431
|
+
# Append to current buffer.
|
|
432
|
+
buffer.extend(chunk)
|
|
433
|
+
|
|
434
|
+
# Round to nearest even number.
|
|
435
|
+
aligned_length = len(buffer) & ~1 # 111111111...11110
|
|
436
|
+
if aligned_length > 0:
|
|
437
|
+
aligned_chunk = buffer[:aligned_length]
|
|
438
|
+
buffer = buffer[aligned_length:] # keep any leftover byte
|
|
439
|
+
|
|
440
|
+
if len(aligned_chunk) > 0:
|
|
441
|
+
frame = TTSAudioRawFrame(bytes(aligned_chunk), self.sample_rate, 1)
|
|
442
|
+
yield frame
|
|
443
|
+
|
|
444
|
+
if len(buffer) > 0:
|
|
445
|
+
# Make sure we don't need an extra padding byte.
|
|
446
|
+
if len(buffer) % 2 == 1:
|
|
447
|
+
buffer.extend(b"\x00")
|
|
448
|
+
frame = TTSAudioRawFrame(bytes(buffer), self.sample_rate, 1)
|
|
449
|
+
yield frame
|
|
450
|
+
|
|
451
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
388
452
|
self._processing_text = False
|
|
389
453
|
await self._text_aggregator.handle_interruption()
|
|
390
454
|
for filter in self._text_filters:
|
|
@@ -465,7 +529,7 @@ class TTSService(AIService):
|
|
|
465
529
|
)
|
|
466
530
|
if isinstance(frame, TTSStartedFrame):
|
|
467
531
|
has_started = True
|
|
468
|
-
elif isinstance(frame, (TTSStoppedFrame,
|
|
532
|
+
elif isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
|
|
469
533
|
has_started = False
|
|
470
534
|
except asyncio.TimeoutError:
|
|
471
535
|
if has_started:
|
|
@@ -550,7 +614,7 @@ class WordTTSService(TTSService):
|
|
|
550
614
|
elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
|
|
551
615
|
await self.flush_audio()
|
|
552
616
|
|
|
553
|
-
async def _handle_interruption(self, frame:
|
|
617
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
554
618
|
await super()._handle_interruption(frame, direction)
|
|
555
619
|
self._llm_response_started = False
|
|
556
620
|
self.reset_word_timestamps()
|
|
@@ -613,7 +677,6 @@ class WebsocketTTSService(TTSService, WebsocketService):
|
|
|
613
677
|
"""
|
|
614
678
|
TTSService.__init__(self, **kwargs)
|
|
615
679
|
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
|
616
|
-
self._register_event_handler("on_connection_error")
|
|
617
680
|
|
|
618
681
|
async def _report_error(self, error: ErrorFrame):
|
|
619
682
|
await self._call_event_handler("on_connection_error", error.error)
|
|
@@ -640,7 +703,7 @@ class InterruptibleTTSService(WebsocketTTSService):
|
|
|
640
703
|
# user interrupts we need to reconnect.
|
|
641
704
|
self._bot_speaking = False
|
|
642
705
|
|
|
643
|
-
async def _handle_interruption(self, frame:
|
|
706
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
644
707
|
await super()._handle_interruption(frame, direction)
|
|
645
708
|
if self._bot_speaking:
|
|
646
709
|
await self._disconnect()
|
|
@@ -665,15 +728,6 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
|
|
|
665
728
|
"""Base class for websocket-based TTS services that support word timestamps.
|
|
666
729
|
|
|
667
730
|
Combines word timestamp functionality with websocket connectivity.
|
|
668
|
-
|
|
669
|
-
Event handlers:
|
|
670
|
-
on_connection_error: Called when a websocket connection error occurs.
|
|
671
|
-
|
|
672
|
-
Example::
|
|
673
|
-
|
|
674
|
-
@tts.event_handler("on_connection_error")
|
|
675
|
-
async def on_connection_error(tts: TTSService, error: str):
|
|
676
|
-
logger.error(f"TTS connection error: {error}")
|
|
677
731
|
"""
|
|
678
732
|
|
|
679
733
|
def __init__(self, *, reconnect_on_error: bool = True, **kwargs):
|
|
@@ -685,7 +739,6 @@ class WebsocketWordTTSService(WordTTSService, WebsocketService):
|
|
|
685
739
|
"""
|
|
686
740
|
WordTTSService.__init__(self, **kwargs)
|
|
687
741
|
WebsocketService.__init__(self, reconnect_on_error=reconnect_on_error, **kwargs)
|
|
688
|
-
self._register_event_handler("on_connection_error")
|
|
689
742
|
|
|
690
743
|
async def _report_error(self, error: ErrorFrame):
|
|
691
744
|
await self._call_event_handler("on_connection_error", error.error)
|
|
@@ -712,7 +765,7 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
|
|
|
712
765
|
# user interrupts we need to reconnect.
|
|
713
766
|
self._bot_speaking = False
|
|
714
767
|
|
|
715
|
-
async def _handle_interruption(self, frame:
|
|
768
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
716
769
|
await super()._handle_interruption(frame, direction)
|
|
717
770
|
if self._bot_speaking:
|
|
718
771
|
await self._disconnect()
|
|
@@ -840,7 +893,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
|
|
|
840
893
|
await super().cancel(frame)
|
|
841
894
|
await self._stop_audio_context_task()
|
|
842
895
|
|
|
843
|
-
async def _handle_interruption(self, frame:
|
|
896
|
+
async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
|
|
844
897
|
await super()._handle_interruption(frame, direction)
|
|
845
898
|
await self._stop_audio_context_task()
|
|
846
899
|
self._create_audio_context_task()
|