dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
pipecat/services/xtts/tts.py
CHANGED
|
@@ -4,12 +4,18 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""XTTS text-to-speech service implementation.
|
|
8
|
+
|
|
9
|
+
This module provides integration with Coqui XTTS streaming server for
|
|
10
|
+
text-to-speech synthesis using local Docker deployment.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
from typing import Any, AsyncGenerator, Dict, Optional
|
|
8
14
|
|
|
9
15
|
import aiohttp
|
|
10
16
|
from loguru import logger
|
|
11
17
|
|
|
12
|
-
from pipecat.audio.utils import
|
|
18
|
+
from pipecat.audio.utils import create_stream_resampler
|
|
13
19
|
from pipecat.frames.frames import (
|
|
14
20
|
ErrorFrame,
|
|
15
21
|
Frame,
|
|
@@ -31,6 +37,14 @@ from pipecat.utils.tracing.service_decorators import traced_tts
|
|
|
31
37
|
|
|
32
38
|
|
|
33
39
|
def language_to_xtts_language(language: Language) -> Optional[str]:
|
|
40
|
+
"""Convert a Language enum to XTTS language code.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
language: The Language enum value to convert.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
The corresponding XTTS language code, or None if not supported.
|
|
47
|
+
"""
|
|
34
48
|
BASE_LANGUAGES = {
|
|
35
49
|
Language.CS: "cs",
|
|
36
50
|
Language.DE: "de",
|
|
@@ -70,6 +84,13 @@ def language_to_xtts_language(language: Language) -> Optional[str]:
|
|
|
70
84
|
|
|
71
85
|
|
|
72
86
|
class XTTSService(TTSService):
|
|
87
|
+
"""Coqui XTTS text-to-speech service.
|
|
88
|
+
|
|
89
|
+
Provides text-to-speech synthesis using a locally running Coqui XTTS
|
|
90
|
+
streaming server. Supports multiple languages and voice cloning through
|
|
91
|
+
studio speakers configuration.
|
|
92
|
+
"""
|
|
93
|
+
|
|
73
94
|
def __init__(
|
|
74
95
|
self,
|
|
75
96
|
*,
|
|
@@ -80,6 +101,16 @@ class XTTSService(TTSService):
|
|
|
80
101
|
sample_rate: Optional[int] = None,
|
|
81
102
|
**kwargs,
|
|
82
103
|
):
|
|
104
|
+
"""Initialize the XTTS service.
|
|
105
|
+
|
|
106
|
+
Args:
|
|
107
|
+
voice_id: ID of the voice/speaker to use for synthesis.
|
|
108
|
+
base_url: Base URL of the XTTS streaming server.
|
|
109
|
+
aiohttp_session: HTTP session for making requests to the server.
|
|
110
|
+
language: Language for synthesis. Defaults to English.
|
|
111
|
+
sample_rate: Audio sample rate. If None, uses default.
|
|
112
|
+
**kwargs: Additional arguments passed to parent TTSService.
|
|
113
|
+
"""
|
|
83
114
|
super().__init__(sample_rate=sample_rate, **kwargs)
|
|
84
115
|
|
|
85
116
|
self._settings = {
|
|
@@ -90,15 +121,33 @@ class XTTSService(TTSService):
|
|
|
90
121
|
self._studio_speakers: Optional[Dict[str, Any]] = None
|
|
91
122
|
self._aiohttp_session = aiohttp_session
|
|
92
123
|
|
|
93
|
-
self._resampler =
|
|
124
|
+
self._resampler = create_stream_resampler()
|
|
94
125
|
|
|
95
126
|
def can_generate_metrics(self) -> bool:
|
|
127
|
+
"""Check if this service can generate processing metrics.
|
|
128
|
+
|
|
129
|
+
Returns:
|
|
130
|
+
True, as XTTS service supports metrics generation.
|
|
131
|
+
"""
|
|
96
132
|
return True
|
|
97
133
|
|
|
98
134
|
def language_to_service_language(self, language: Language) -> Optional[str]:
|
|
135
|
+
"""Convert a Language enum to XTTS service language format.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
language: The language to convert.
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
The XTTS-specific language code, or None if not supported.
|
|
142
|
+
"""
|
|
99
143
|
return language_to_xtts_language(language)
|
|
100
144
|
|
|
101
145
|
async def start(self, frame: StartFrame):
|
|
146
|
+
"""Start the XTTS service and load studio speakers.
|
|
147
|
+
|
|
148
|
+
Args:
|
|
149
|
+
frame: The start frame containing initialization parameters.
|
|
150
|
+
"""
|
|
102
151
|
await super().start(frame)
|
|
103
152
|
|
|
104
153
|
if self._studio_speakers:
|
|
@@ -120,6 +169,14 @@ class XTTSService(TTSService):
|
|
|
120
169
|
|
|
121
170
|
@traced_tts
|
|
122
171
|
async def run_tts(self, text: str) -> AsyncGenerator[Frame, None]:
|
|
172
|
+
"""Generate speech from text using XTTS streaming server.
|
|
173
|
+
|
|
174
|
+
Args:
|
|
175
|
+
text: The text to synthesize into speech.
|
|
176
|
+
|
|
177
|
+
Yields:
|
|
178
|
+
Frame: Audio frames containing the synthesized speech.
|
|
179
|
+
"""
|
|
123
180
|
logger.debug(f"{self}: Generating TTS [{text}]")
|
|
124
181
|
|
|
125
182
|
if not self._studio_speakers:
|
pipecat/sync/base_notifier.py
CHANGED
|
@@ -4,14 +4,33 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Base notifier interface for Pipecat."""
|
|
8
|
+
|
|
7
9
|
from abc import ABC, abstractmethod
|
|
8
10
|
|
|
9
11
|
|
|
10
12
|
class BaseNotifier(ABC):
|
|
13
|
+
"""Abstract base class for notification mechanisms.
|
|
14
|
+
|
|
15
|
+
Provides a standard interface for implementing notification and waiting
|
|
16
|
+
patterns used for event coordination and signaling between components
|
|
17
|
+
in the Pipecat framework.
|
|
18
|
+
"""
|
|
19
|
+
|
|
11
20
|
@abstractmethod
|
|
12
21
|
async def notify(self):
|
|
22
|
+
"""Send a notification signal.
|
|
23
|
+
|
|
24
|
+
Implementations should trigger any waiting coroutines or processes
|
|
25
|
+
that are blocked on this notifier.
|
|
26
|
+
"""
|
|
13
27
|
pass
|
|
14
28
|
|
|
15
29
|
@abstractmethod
|
|
16
30
|
async def wait(self):
|
|
31
|
+
"""Wait for a notification signal.
|
|
32
|
+
|
|
33
|
+
Implementations should block until a notification is received
|
|
34
|
+
from the corresponding notify() call.
|
|
35
|
+
"""
|
|
17
36
|
pass
|
pipecat/sync/event_notifier.py
CHANGED
|
@@ -4,18 +4,42 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Event-based notifier implementation using asyncio Event primitives."""
|
|
8
|
+
|
|
7
9
|
import asyncio
|
|
8
10
|
|
|
9
11
|
from pipecat.sync.base_notifier import BaseNotifier
|
|
10
12
|
|
|
11
13
|
|
|
12
14
|
class EventNotifier(BaseNotifier):
|
|
15
|
+
"""Event-based notifier using asyncio.Event for task synchronization.
|
|
16
|
+
|
|
17
|
+
Provides a simple notification mechanism where one task can signal
|
|
18
|
+
an event and other tasks can wait for that event to occur. The event
|
|
19
|
+
is automatically cleared after each wait operation.
|
|
20
|
+
"""
|
|
21
|
+
|
|
13
22
|
def __init__(self):
|
|
23
|
+
"""Initialize the event notifier.
|
|
24
|
+
|
|
25
|
+
Creates an internal asyncio.Event for managing notifications.
|
|
26
|
+
"""
|
|
14
27
|
self._event = asyncio.Event()
|
|
15
28
|
|
|
16
29
|
async def notify(self):
|
|
30
|
+
"""Signal the event to notify waiting tasks.
|
|
31
|
+
|
|
32
|
+
Sets the internal event, causing any tasks waiting on this
|
|
33
|
+
notifier to be awakened.
|
|
34
|
+
"""
|
|
17
35
|
self._event.set()
|
|
18
36
|
|
|
19
37
|
async def wait(self):
|
|
38
|
+
"""Wait for the event to be signaled.
|
|
39
|
+
|
|
40
|
+
Blocks until another task calls notify(). Automatically clears
|
|
41
|
+
the event after being awakened so subsequent calls will wait
|
|
42
|
+
for the next notification.
|
|
43
|
+
"""
|
|
20
44
|
await self._event.wait()
|
|
21
45
|
self._event.clear()
|
pipecat/tests/utils.py
CHANGED
|
@@ -4,6 +4,8 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Testing utilities for Pipecat pipeline components."""
|
|
8
|
+
|
|
7
9
|
import asyncio
|
|
8
10
|
from dataclasses import dataclass
|
|
9
11
|
from typing import Any, Awaitable, Callable, Dict, List, Optional, Sequence, Tuple
|
|
@@ -24,15 +26,27 @@ from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
|
24
26
|
|
|
25
27
|
@dataclass
|
|
26
28
|
class SleepFrame(SystemFrame):
|
|
27
|
-
"""
|
|
28
|
-
|
|
29
|
-
control
|
|
29
|
+
"""A system frame that introduces a sleep delay in the test pipeline.
|
|
30
|
+
|
|
31
|
+
This frame is used by the test framework to control timing between
|
|
32
|
+
frame processing, allowing tests to separate system frames from
|
|
33
|
+
data or control frames.
|
|
34
|
+
|
|
35
|
+
Parameters:
|
|
36
|
+
sleep: Duration to sleep in seconds before processing the next frame.
|
|
30
37
|
"""
|
|
31
38
|
|
|
32
|
-
sleep: float = 0.
|
|
39
|
+
sleep: float = 0.2
|
|
33
40
|
|
|
34
41
|
|
|
35
42
|
class HeartbeatsObserver(BaseObserver):
|
|
43
|
+
"""Observer that monitors heartbeat frames from a specific processor.
|
|
44
|
+
|
|
45
|
+
This observer watches for HeartbeatFrames from a target processor and
|
|
46
|
+
invokes a callback when they are detected, useful for testing timing
|
|
47
|
+
and lifecycle events.
|
|
48
|
+
"""
|
|
49
|
+
|
|
36
50
|
def __init__(
|
|
37
51
|
self,
|
|
38
52
|
*,
|
|
@@ -40,11 +54,23 @@ class HeartbeatsObserver(BaseObserver):
|
|
|
40
54
|
heartbeat_callback: Callable[[FrameProcessor, HeartbeatFrame], Awaitable[None]],
|
|
41
55
|
**kwargs,
|
|
42
56
|
):
|
|
57
|
+
"""Initialize the heartbeats observer.
|
|
58
|
+
|
|
59
|
+
Args:
|
|
60
|
+
target: The frame processor to monitor for heartbeat frames.
|
|
61
|
+
heartbeat_callback: Async callback function to invoke when heartbeats are detected.
|
|
62
|
+
**kwargs: Additional arguments passed to the parent observer.
|
|
63
|
+
"""
|
|
43
64
|
super().__init__(**kwargs)
|
|
44
65
|
self._target = target
|
|
45
66
|
self._callback = heartbeat_callback
|
|
46
67
|
|
|
47
68
|
async def on_push_frame(self, data: FramePushed):
|
|
69
|
+
"""Handle frame push events and detect heartbeats from target processor.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
data: The frame push event data containing source and frame information.
|
|
73
|
+
"""
|
|
48
74
|
src = data.source
|
|
49
75
|
frame = data.frame
|
|
50
76
|
|
|
@@ -53,6 +79,13 @@ class HeartbeatsObserver(BaseObserver):
|
|
|
53
79
|
|
|
54
80
|
|
|
55
81
|
class QueuedFrameProcessor(FrameProcessor):
|
|
82
|
+
"""A processor that captures frames in a queue for testing purposes.
|
|
83
|
+
|
|
84
|
+
This processor intercepts frames flowing in a specific direction and
|
|
85
|
+
stores them in a queue for later inspection during testing, while
|
|
86
|
+
still allowing the frames to continue through the pipeline.
|
|
87
|
+
"""
|
|
88
|
+
|
|
56
89
|
def __init__(
|
|
57
90
|
self,
|
|
58
91
|
*,
|
|
@@ -60,12 +93,25 @@ class QueuedFrameProcessor(FrameProcessor):
|
|
|
60
93
|
queue_direction: FrameDirection,
|
|
61
94
|
ignore_start: bool = True,
|
|
62
95
|
):
|
|
63
|
-
|
|
96
|
+
"""Initialize the queued frame processor.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
queue: The asyncio queue to store captured frames.
|
|
100
|
+
queue_direction: The direction of frames to capture (UPSTREAM or DOWNSTREAM).
|
|
101
|
+
ignore_start: Whether to ignore StartFrames when capturing.
|
|
102
|
+
"""
|
|
103
|
+
super().__init__(enable_direct_mode=True)
|
|
64
104
|
self._queue = queue
|
|
65
105
|
self._queue_direction = queue_direction
|
|
66
106
|
self._ignore_start = ignore_start
|
|
67
107
|
|
|
68
108
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
109
|
+
"""Process frames and capture them in the queue if they match the direction.
|
|
110
|
+
|
|
111
|
+
Args:
|
|
112
|
+
frame: The frame to process.
|
|
113
|
+
direction: The direction the frame is flowing.
|
|
114
|
+
"""
|
|
69
115
|
await super().process_frame(frame, direction)
|
|
70
116
|
|
|
71
117
|
if direction == self._queue_direction:
|
|
@@ -85,6 +131,28 @@ async def run_test(
|
|
|
85
131
|
start_metadata: Optional[Dict[str, Any]] = None,
|
|
86
132
|
send_end_frame: bool = True,
|
|
87
133
|
) -> Tuple[Sequence[Frame], Sequence[Frame]]:
|
|
134
|
+
"""Run a test pipeline with the specified processor and validate frame flow.
|
|
135
|
+
|
|
136
|
+
This function creates a test pipeline with the given processor, sends the
|
|
137
|
+
specified frames through it, and validates that the expected frames are
|
|
138
|
+
received in both upstream and downstream directions.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
processor: The frame processor to test.
|
|
142
|
+
frames_to_send: Sequence of frames to send through the processor.
|
|
143
|
+
expected_down_frames: Expected frame types flowing downstream (optional).
|
|
144
|
+
expected_up_frames: Expected frame types flowing upstream (optional).
|
|
145
|
+
ignore_start: Whether to ignore StartFrames in frame validation.
|
|
146
|
+
observers: Optional list of observers to attach to the pipeline.
|
|
147
|
+
start_metadata: Optional metadata to include with the StartFrame.
|
|
148
|
+
send_end_frame: Whether to send an EndFrame at the end of the test.
|
|
149
|
+
|
|
150
|
+
Returns:
|
|
151
|
+
Tuple containing (downstream_frames, upstream_frames) that were received.
|
|
152
|
+
|
|
153
|
+
Raises:
|
|
154
|
+
AssertionError: If the received frames don't match the expected frame types.
|
|
155
|
+
"""
|
|
88
156
|
observers = observers or []
|
|
89
157
|
start_metadata = start_metadata or {}
|
|
90
158
|
|
|
@@ -4,13 +4,23 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Language code enumerations for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides comprehensive language code constants following ISO 639
|
|
10
|
+
and BCP 47 standards, supporting both language-only and language-region
|
|
11
|
+
combinations for various speech and text processing services.
|
|
12
|
+
"""
|
|
13
|
+
|
|
7
14
|
import sys
|
|
8
15
|
from enum import Enum
|
|
9
16
|
|
|
10
17
|
if sys.version_info < (3, 11):
|
|
11
18
|
|
|
12
19
|
class StrEnum(str, Enum):
|
|
20
|
+
"""String enumeration base class for Python < 3.11 compatibility."""
|
|
21
|
+
|
|
13
22
|
def __new__(cls, value):
|
|
23
|
+
"""Create a new instance of the StrEnum."""
|
|
14
24
|
obj = str.__new__(cls, value)
|
|
15
25
|
obj._value_ = value
|
|
16
26
|
return obj
|
|
@@ -19,6 +29,14 @@ else:
|
|
|
19
29
|
|
|
20
30
|
|
|
21
31
|
class Language(StrEnum):
|
|
32
|
+
"""Language codes for speech and text processing services.
|
|
33
|
+
|
|
34
|
+
Provides comprehensive language code constants following ISO 639 and BCP 47
|
|
35
|
+
standards. Includes both language-only codes (e.g., 'en') and language-region
|
|
36
|
+
combinations (e.g., 'en-US') to support various speech synthesis, recognition,
|
|
37
|
+
and translation services.
|
|
38
|
+
"""
|
|
39
|
+
|
|
22
40
|
# Afrikaans
|
|
23
41
|
AF = "af"
|
|
24
42
|
AF_ZA = "af-ZA"
|
|
@@ -127,6 +145,9 @@ class Language(StrEnum):
|
|
|
127
145
|
EN_US = "en-US"
|
|
128
146
|
EN_ZA = "en-ZA"
|
|
129
147
|
|
|
148
|
+
# Esperanto
|
|
149
|
+
EO = "eo"
|
|
150
|
+
|
|
130
151
|
# Spanish
|
|
131
152
|
ES = "es"
|
|
132
153
|
ES_AR = "es-AR"
|
|
@@ -456,6 +477,9 @@ class Language(StrEnum):
|
|
|
456
477
|
# Tatar
|
|
457
478
|
TT = "tt"
|
|
458
479
|
|
|
480
|
+
# Uyghur
|
|
481
|
+
UG = "ug"
|
|
482
|
+
|
|
459
483
|
# Ukrainian
|
|
460
484
|
UK = "uk"
|
|
461
485
|
UK_UA = "uk-UA"
|
pipecat/transports/base_input.py
CHANGED
|
@@ -4,6 +4,12 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Base input transport implementation for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides the BaseInputTransport class which handles audio and video
|
|
10
|
+
input processing, including VAD, turn analysis, and interruption management.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import asyncio
|
|
8
14
|
from concurrent.futures import ThreadPoolExecutor
|
|
9
15
|
from typing import Optional
|
|
@@ -28,6 +34,7 @@ from pipecat.frames.frames import (
|
|
|
28
34
|
InputAudioRawFrame,
|
|
29
35
|
InputImageRawFrame,
|
|
30
36
|
MetricsFrame,
|
|
37
|
+
SpeechControlParamsFrame,
|
|
31
38
|
StartFrame,
|
|
32
39
|
StartInterruptionFrame,
|
|
33
40
|
StopFrame,
|
|
@@ -47,7 +54,20 @@ AUDIO_INPUT_TIMEOUT_SECS = 0.5
|
|
|
47
54
|
|
|
48
55
|
|
|
49
56
|
class BaseInputTransport(FrameProcessor):
|
|
57
|
+
"""Base class for input transport implementations.
|
|
58
|
+
|
|
59
|
+
Handles audio and video input processing including Voice Activity Detection,
|
|
60
|
+
turn analysis, audio filtering, and user interaction management. Supports
|
|
61
|
+
interruption handling and provides hooks for transport-specific implementations.
|
|
62
|
+
"""
|
|
63
|
+
|
|
50
64
|
def __init__(self, params: TransportParams, **kwargs):
|
|
65
|
+
"""Initialize the base input transport.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
params: Transport configuration parameters.
|
|
69
|
+
**kwargs: Additional arguments passed to parent class.
|
|
70
|
+
"""
|
|
51
71
|
super().__init__(**kwargs)
|
|
52
72
|
|
|
53
73
|
self._params = params
|
|
@@ -115,25 +135,54 @@ class BaseInputTransport(FrameProcessor):
|
|
|
115
135
|
self._params.video_out_color_format = self._params.camera_out_color_format
|
|
116
136
|
|
|
117
137
|
def enable_audio_in_stream_on_start(self, enabled: bool) -> None:
|
|
138
|
+
"""Enable or disable audio streaming on transport start.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
enabled: Whether to start audio streaming immediately on transport start.
|
|
142
|
+
"""
|
|
118
143
|
self.logger.debug(f"Enabling audio on start. {enabled}")
|
|
119
144
|
self._params.audio_in_stream_on_start = enabled
|
|
120
145
|
|
|
121
146
|
async def start_audio_in_streaming(self):
|
|
147
|
+
"""Start audio input streaming.
|
|
148
|
+
|
|
149
|
+
Override in subclasses to implement transport-specific audio streaming.
|
|
150
|
+
"""
|
|
122
151
|
pass
|
|
123
152
|
|
|
124
153
|
@property
|
|
125
154
|
def sample_rate(self) -> int:
|
|
155
|
+
"""Get the current audio sample rate.
|
|
156
|
+
|
|
157
|
+
Returns:
|
|
158
|
+
The sample rate in Hz.
|
|
159
|
+
"""
|
|
126
160
|
return self._sample_rate
|
|
127
161
|
|
|
128
162
|
@property
|
|
129
163
|
def vad_analyzer(self) -> Optional[VADAnalyzer]:
|
|
164
|
+
"""Get the Voice Activity Detection analyzer.
|
|
165
|
+
|
|
166
|
+
Returns:
|
|
167
|
+
The VAD analyzer instance if configured, None otherwise.
|
|
168
|
+
"""
|
|
130
169
|
return self._params.vad_analyzer
|
|
131
170
|
|
|
132
171
|
@property
|
|
133
172
|
def turn_analyzer(self) -> Optional[BaseTurnAnalyzer]:
|
|
173
|
+
"""Get the turn-taking analyzer.
|
|
174
|
+
|
|
175
|
+
Returns:
|
|
176
|
+
The turn analyzer instance if configured, None otherwise.
|
|
177
|
+
"""
|
|
134
178
|
return self._params.turn_analyzer
|
|
135
179
|
|
|
136
180
|
async def start(self, frame: StartFrame):
|
|
181
|
+
"""Start the input transport and initialize components.
|
|
182
|
+
|
|
183
|
+
Args:
|
|
184
|
+
frame: The start frame containing initialization parameters.
|
|
185
|
+
"""
|
|
137
186
|
self._paused = False
|
|
138
187
|
self._user_speaking = False
|
|
139
188
|
|
|
@@ -147,11 +196,23 @@ class BaseInputTransport(FrameProcessor):
|
|
|
147
196
|
if self._params.turn_analyzer:
|
|
148
197
|
self._params.turn_analyzer.set_sample_rate(self._sample_rate)
|
|
149
198
|
|
|
199
|
+
if self._params.vad_analyzer or self._params.turn_analyzer:
|
|
200
|
+
vad_params = self._params.vad_analyzer.params if self._params.vad_analyzer else None
|
|
201
|
+
turn_params = self._params.turn_analyzer.params if self._params.turn_analyzer else None
|
|
202
|
+
|
|
203
|
+
speech_frame = SpeechControlParamsFrame(vad_params=vad_params, turn_params=turn_params)
|
|
204
|
+
await self.push_frame(speech_frame)
|
|
205
|
+
|
|
150
206
|
# Start audio filter.
|
|
151
207
|
if self._params.audio_in_filter:
|
|
152
208
|
await self._params.audio_in_filter.start(self._sample_rate)
|
|
153
209
|
|
|
154
210
|
async def stop(self, frame: EndFrame):
|
|
211
|
+
"""Stop the input transport and cleanup resources.
|
|
212
|
+
|
|
213
|
+
Args:
|
|
214
|
+
frame: The end frame signaling transport shutdown.
|
|
215
|
+
"""
|
|
155
216
|
# Cancel and wait for the audio input task to finish.
|
|
156
217
|
await self._cancel_audio_task()
|
|
157
218
|
# Stop audio filter.
|
|
@@ -159,6 +220,11 @@ class BaseInputTransport(FrameProcessor):
|
|
|
159
220
|
await self._params.audio_in_filter.stop()
|
|
160
221
|
|
|
161
222
|
async def pause(self, frame: StopFrame):
|
|
223
|
+
"""Pause the input transport temporarily.
|
|
224
|
+
|
|
225
|
+
Args:
|
|
226
|
+
frame: The stop frame signaling transport pause.
|
|
227
|
+
"""
|
|
162
228
|
self._paused = True
|
|
163
229
|
# Cancel task so we clear the queue
|
|
164
230
|
await self._cancel_audio_task()
|
|
@@ -166,19 +232,38 @@ class BaseInputTransport(FrameProcessor):
|
|
|
166
232
|
self._create_audio_task()
|
|
167
233
|
|
|
168
234
|
async def cancel(self, frame: CancelFrame):
|
|
235
|
+
"""Cancel the input transport and stop all processing.
|
|
236
|
+
|
|
237
|
+
Args:
|
|
238
|
+
frame: The cancel frame signaling immediate cancellation.
|
|
239
|
+
"""
|
|
169
240
|
# Cancel and wait for the audio input task to finish.
|
|
170
241
|
await self._cancel_audio_task()
|
|
171
242
|
|
|
172
243
|
async def set_transport_ready(self, frame: StartFrame):
|
|
173
|
-
"""
|
|
244
|
+
"""Called when the transport is ready to stream.
|
|
245
|
+
|
|
246
|
+
Args:
|
|
247
|
+
frame: The start frame containing initialization parameters.
|
|
248
|
+
"""
|
|
174
249
|
# Create audio input queue and task if needed.
|
|
175
250
|
self._create_audio_task()
|
|
176
251
|
|
|
177
252
|
async def push_video_frame(self, frame: InputImageRawFrame):
|
|
253
|
+
"""Push a video frame downstream if video input is enabled.
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
frame: The input video frame to process.
|
|
257
|
+
"""
|
|
178
258
|
if self._params.video_in_enabled and not self._paused:
|
|
179
259
|
await self.push_frame(frame)
|
|
180
260
|
|
|
181
261
|
async def push_audio_frame(self, frame: InputAudioRawFrame):
|
|
262
|
+
"""Push an audio frame to the processing queue if audio input is enabled.
|
|
263
|
+
|
|
264
|
+
Args:
|
|
265
|
+
frame: The input audio frame to process.
|
|
266
|
+
"""
|
|
182
267
|
if self._params.audio_in_enabled and not self._paused:
|
|
183
268
|
await self._audio_in_queue.put(frame)
|
|
184
269
|
|
|
@@ -187,6 +272,12 @@ class BaseInputTransport(FrameProcessor):
|
|
|
187
272
|
#
|
|
188
273
|
|
|
189
274
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
275
|
+
"""Process incoming frames and handle transport-specific logic.
|
|
276
|
+
|
|
277
|
+
Args:
|
|
278
|
+
frame: The frame to process.
|
|
279
|
+
direction: The direction of frame flow in the pipeline.
|
|
280
|
+
"""
|
|
190
281
|
await super().process_frame(frame, direction)
|
|
191
282
|
|
|
192
283
|
# Specific system frames
|
|
@@ -216,6 +307,13 @@ class BaseInputTransport(FrameProcessor):
|
|
|
216
307
|
elif isinstance(frame, VADParamsUpdateFrame):
|
|
217
308
|
if self.vad_analyzer:
|
|
218
309
|
self.vad_analyzer.set_params(frame.params, bot_logger=self.logger)
|
|
310
|
+
speech_frame = SpeechControlParamsFrame(
|
|
311
|
+
vad_params=frame.params,
|
|
312
|
+
turn_params=self._params.turn_analyzer.params
|
|
313
|
+
if self._params.turn_analyzer
|
|
314
|
+
else None,
|
|
315
|
+
)
|
|
316
|
+
await self.push_frame(speech_frame)
|
|
219
317
|
elif isinstance(frame, SystemFrame):
|
|
220
318
|
await self.push_frame(frame, direction)
|
|
221
319
|
# Control frames
|
|
@@ -238,12 +336,14 @@ class BaseInputTransport(FrameProcessor):
|
|
|
238
336
|
#
|
|
239
337
|
|
|
240
338
|
async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
|
|
339
|
+
"""Handle bot interruption frames."""
|
|
241
340
|
self.logger.debug("Bot interruption")
|
|
242
341
|
if self.interruptions_allowed:
|
|
243
342
|
await self._start_interruption()
|
|
244
343
|
await self.push_frame(StartInterruptionFrame())
|
|
245
344
|
|
|
246
345
|
async def _handle_user_interruption(self, frame: Frame):
|
|
346
|
+
"""Handle user interruption events based on speaking state."""
|
|
247
347
|
if isinstance(frame, UserStartedSpeakingFrame):
|
|
248
348
|
self.logger.debug("User started speaking")
|
|
249
349
|
self._user_speaking = True
|
|
@@ -281,9 +381,11 @@ class BaseInputTransport(FrameProcessor):
|
|
|
281
381
|
#
|
|
282
382
|
|
|
283
383
|
async def _handle_bot_started_speaking(self, frame: BotStartedSpeakingFrame):
|
|
384
|
+
"""Update bot speaking state when bot starts speaking."""
|
|
284
385
|
self._bot_speaking = True
|
|
285
386
|
|
|
286
387
|
async def _handle_bot_stopped_speaking(self, frame: BotStoppedSpeakingFrame):
|
|
388
|
+
"""Update bot speaking state when bot stops speaking."""
|
|
287
389
|
self._bot_speaking = False
|
|
288
390
|
|
|
289
391
|
#
|
|
@@ -291,16 +393,19 @@ class BaseInputTransport(FrameProcessor):
|
|
|
291
393
|
#
|
|
292
394
|
|
|
293
395
|
def _create_audio_task(self):
|
|
396
|
+
"""Create the audio processing task if audio input is enabled."""
|
|
294
397
|
if not self._audio_task and self._params.audio_in_enabled:
|
|
295
398
|
self._audio_in_queue = asyncio.Queue()
|
|
296
399
|
self._audio_task = self.create_task(self._audio_task_handler())
|
|
297
400
|
|
|
298
401
|
async def _cancel_audio_task(self):
|
|
402
|
+
"""Cancel and cleanup the audio processing task."""
|
|
299
403
|
if self._audio_task:
|
|
300
404
|
await self.cancel_task(self._audio_task)
|
|
301
405
|
self._audio_task = None
|
|
302
406
|
|
|
303
407
|
async def _vad_analyze(self, audio_frame: InputAudioRawFrame) -> VADState:
|
|
408
|
+
"""Analyze audio frame for voice activity."""
|
|
304
409
|
state = VADState.QUIET
|
|
305
410
|
if self.vad_analyzer:
|
|
306
411
|
state = await self.get_event_loop().run_in_executor(
|
|
@@ -309,6 +414,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
309
414
|
return state
|
|
310
415
|
|
|
311
416
|
async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState):
|
|
417
|
+
"""Handle Voice Activity Detection results and generate appropriate frames."""
|
|
312
418
|
new_vad_state = await self._vad_analyze(audio_frame)
|
|
313
419
|
if (
|
|
314
420
|
new_vad_state != vad_state
|
|
@@ -339,18 +445,21 @@ class BaseInputTransport(FrameProcessor):
|
|
|
339
445
|
return vad_state
|
|
340
446
|
|
|
341
447
|
async def _handle_end_of_turn(self):
|
|
448
|
+
"""Handle end-of-turn analysis and generate prediction results."""
|
|
342
449
|
if self.turn_analyzer:
|
|
343
450
|
state, prediction = await self.turn_analyzer.analyze_end_of_turn()
|
|
344
451
|
await self._handle_prediction_result(prediction)
|
|
345
452
|
await self._handle_end_of_turn_complete(state)
|
|
346
453
|
|
|
347
454
|
async def _handle_end_of_turn_complete(self, state: EndOfTurnState):
|
|
455
|
+
"""Handle completion of end-of-turn analysis."""
|
|
348
456
|
if state == EndOfTurnState.COMPLETE:
|
|
349
457
|
await self._handle_user_interruption(UserStoppedSpeakingFrame())
|
|
350
458
|
|
|
351
459
|
async def _run_turn_analyzer(
|
|
352
460
|
self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
|
|
353
461
|
):
|
|
462
|
+
"""Run turn analysis on audio frame and handle results."""
|
|
354
463
|
is_speech = vad_state == VADState.SPEAKING or vad_state == VADState.STARTING
|
|
355
464
|
# If silence exceeds threshold, we are going to receive EndOfTurnState.COMPLETE
|
|
356
465
|
end_of_turn_state = self._params.turn_analyzer.append_audio(frame.audio, is_speech)
|
|
@@ -361,6 +470,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
361
470
|
await self._handle_end_of_turn()
|
|
362
471
|
|
|
363
472
|
async def _audio_task_handler(self):
|
|
473
|
+
"""Main audio processing task handler for VAD and turn analysis."""
|
|
364
474
|
vad_state: VADState = VADState.QUIET
|
|
365
475
|
while True:
|
|
366
476
|
try:
|
|
@@ -395,13 +505,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
395
505
|
if self._params.turn_analyzer:
|
|
396
506
|
self._params.turn_analyzer.clear()
|
|
397
507
|
await self._handle_user_interruption(UserStoppedSpeakingFrame())
|
|
398
|
-
finally:
|
|
399
|
-
self.reset_watchdog()
|
|
400
508
|
|
|
401
509
|
async def _handle_prediction_result(self, result: MetricsData):
|
|
402
|
-
"""Handle a prediction result event from the turn analyzer.
|
|
403
|
-
|
|
404
|
-
Args:
|
|
405
|
-
result: The prediction result MetricsData.
|
|
406
|
-
"""
|
|
510
|
+
"""Handle a prediction result event from the turn analyzer."""
|
|
407
511
|
await self.push_frame(MetricsFrame(data=[result]))
|