dv-pipecat-ai 0.0.74.dev770__py3-none-any.whl → 0.0.82.dev776__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/METADATA +137 -93
- dv_pipecat_ai-0.0.82.dev776.dist-info/RECORD +340 -0
- pipecat/__init__.py +17 -0
- pipecat/adapters/base_llm_adapter.py +36 -1
- pipecat/adapters/schemas/direct_function.py +296 -0
- pipecat/adapters/schemas/function_schema.py +15 -6
- pipecat/adapters/schemas/tools_schema.py +55 -7
- pipecat/adapters/services/anthropic_adapter.py +22 -3
- pipecat/adapters/services/aws_nova_sonic_adapter.py +23 -3
- pipecat/adapters/services/bedrock_adapter.py +22 -3
- pipecat/adapters/services/gemini_adapter.py +16 -3
- pipecat/adapters/services/open_ai_adapter.py +17 -2
- pipecat/adapters/services/open_ai_realtime_adapter.py +23 -3
- pipecat/audio/filters/base_audio_filter.py +30 -6
- pipecat/audio/filters/koala_filter.py +37 -2
- pipecat/audio/filters/krisp_filter.py +59 -6
- pipecat/audio/filters/noisereduce_filter.py +37 -0
- pipecat/audio/interruptions/base_interruption_strategy.py +25 -5
- pipecat/audio/interruptions/min_words_interruption_strategy.py +21 -4
- pipecat/audio/mixers/base_audio_mixer.py +30 -7
- pipecat/audio/mixers/soundfile_mixer.py +53 -6
- pipecat/audio/resamplers/base_audio_resampler.py +17 -9
- pipecat/audio/resamplers/resampy_resampler.py +26 -1
- pipecat/audio/resamplers/soxr_resampler.py +32 -1
- pipecat/audio/resamplers/soxr_stream_resampler.py +101 -0
- pipecat/audio/utils.py +194 -1
- pipecat/audio/vad/silero.py +60 -3
- pipecat/audio/vad/vad_analyzer.py +114 -30
- pipecat/clocks/base_clock.py +19 -0
- pipecat/clocks/system_clock.py +25 -0
- pipecat/extensions/voicemail/__init__.py +0 -0
- pipecat/extensions/voicemail/voicemail_detector.py +707 -0
- pipecat/frames/frames.py +590 -156
- pipecat/metrics/metrics.py +64 -1
- pipecat/observers/base_observer.py +58 -19
- pipecat/observers/loggers/debug_log_observer.py +56 -64
- pipecat/observers/loggers/llm_log_observer.py +8 -1
- pipecat/observers/loggers/transcription_log_observer.py +19 -7
- pipecat/observers/loggers/user_bot_latency_log_observer.py +32 -5
- pipecat/observers/turn_tracking_observer.py +26 -1
- pipecat/pipeline/base_pipeline.py +5 -7
- pipecat/pipeline/base_task.py +52 -9
- pipecat/pipeline/parallel_pipeline.py +121 -177
- pipecat/pipeline/pipeline.py +129 -20
- pipecat/pipeline/runner.py +50 -1
- pipecat/pipeline/sync_parallel_pipeline.py +132 -32
- pipecat/pipeline/task.py +263 -280
- pipecat/pipeline/task_observer.py +85 -34
- pipecat/pipeline/to_be_updated/merge_pipeline.py +32 -2
- pipecat/processors/aggregators/dtmf_aggregator.py +29 -22
- pipecat/processors/aggregators/gated.py +25 -24
- pipecat/processors/aggregators/gated_openai_llm_context.py +22 -2
- pipecat/processors/aggregators/llm_response.py +398 -89
- pipecat/processors/aggregators/openai_llm_context.py +161 -13
- pipecat/processors/aggregators/sentence.py +25 -14
- pipecat/processors/aggregators/user_response.py +28 -3
- pipecat/processors/aggregators/vision_image_frame.py +24 -14
- pipecat/processors/async_generator.py +28 -0
- pipecat/processors/audio/audio_buffer_processor.py +78 -37
- pipecat/processors/consumer_processor.py +25 -6
- pipecat/processors/filters/frame_filter.py +23 -0
- pipecat/processors/filters/function_filter.py +30 -0
- pipecat/processors/filters/identity_filter.py +17 -2
- pipecat/processors/filters/null_filter.py +24 -1
- pipecat/processors/filters/stt_mute_filter.py +56 -21
- pipecat/processors/filters/wake_check_filter.py +46 -3
- pipecat/processors/filters/wake_notifier_filter.py +21 -3
- pipecat/processors/frame_processor.py +488 -131
- pipecat/processors/frameworks/langchain.py +38 -3
- pipecat/processors/frameworks/rtvi.py +719 -34
- pipecat/processors/gstreamer/pipeline_source.py +41 -0
- pipecat/processors/idle_frame_processor.py +26 -3
- pipecat/processors/logger.py +23 -0
- pipecat/processors/metrics/frame_processor_metrics.py +77 -4
- pipecat/processors/metrics/sentry.py +42 -4
- pipecat/processors/producer_processor.py +34 -14
- pipecat/processors/text_transformer.py +22 -10
- pipecat/processors/transcript_processor.py +48 -29
- pipecat/processors/user_idle_processor.py +31 -21
- pipecat/runner/__init__.py +1 -0
- pipecat/runner/daily.py +132 -0
- pipecat/runner/livekit.py +148 -0
- pipecat/runner/run.py +543 -0
- pipecat/runner/types.py +67 -0
- pipecat/runner/utils.py +515 -0
- pipecat/serializers/base_serializer.py +42 -0
- pipecat/serializers/exotel.py +17 -6
- pipecat/serializers/genesys.py +95 -0
- pipecat/serializers/livekit.py +33 -0
- pipecat/serializers/plivo.py +16 -15
- pipecat/serializers/protobuf.py +37 -1
- pipecat/serializers/telnyx.py +18 -17
- pipecat/serializers/twilio.py +32 -16
- pipecat/services/ai_service.py +5 -3
- pipecat/services/anthropic/llm.py +113 -43
- pipecat/services/assemblyai/models.py +63 -5
- pipecat/services/assemblyai/stt.py +64 -11
- pipecat/services/asyncai/__init__.py +0 -0
- pipecat/services/asyncai/tts.py +501 -0
- pipecat/services/aws/llm.py +185 -111
- pipecat/services/aws/stt.py +217 -23
- pipecat/services/aws/tts.py +118 -52
- pipecat/services/aws/utils.py +101 -5
- pipecat/services/aws_nova_sonic/aws.py +82 -64
- pipecat/services/aws_nova_sonic/context.py +15 -6
- pipecat/services/azure/common.py +10 -2
- pipecat/services/azure/image.py +32 -0
- pipecat/services/azure/llm.py +9 -7
- pipecat/services/azure/stt.py +65 -2
- pipecat/services/azure/tts.py +154 -23
- pipecat/services/cartesia/stt.py +125 -8
- pipecat/services/cartesia/tts.py +102 -38
- pipecat/services/cerebras/llm.py +15 -23
- pipecat/services/deepgram/stt.py +19 -11
- pipecat/services/deepgram/tts.py +36 -0
- pipecat/services/deepseek/llm.py +14 -23
- pipecat/services/elevenlabs/tts.py +330 -64
- pipecat/services/fal/image.py +43 -0
- pipecat/services/fal/stt.py +48 -10
- pipecat/services/fireworks/llm.py +14 -21
- pipecat/services/fish/tts.py +109 -9
- pipecat/services/gemini_multimodal_live/__init__.py +1 -0
- pipecat/services/gemini_multimodal_live/events.py +83 -2
- pipecat/services/gemini_multimodal_live/file_api.py +189 -0
- pipecat/services/gemini_multimodal_live/gemini.py +218 -21
- pipecat/services/gladia/config.py +17 -10
- pipecat/services/gladia/stt.py +82 -36
- pipecat/services/google/frames.py +40 -0
- pipecat/services/google/google.py +2 -0
- pipecat/services/google/image.py +39 -2
- pipecat/services/google/llm.py +176 -58
- pipecat/services/google/llm_openai.py +26 -4
- pipecat/services/google/llm_vertex.py +37 -15
- pipecat/services/google/rtvi.py +41 -0
- pipecat/services/google/stt.py +65 -17
- pipecat/services/google/test-google-chirp.py +45 -0
- pipecat/services/google/tts.py +390 -19
- pipecat/services/grok/llm.py +8 -6
- pipecat/services/groq/llm.py +8 -6
- pipecat/services/groq/stt.py +13 -9
- pipecat/services/groq/tts.py +40 -0
- pipecat/services/hamsa/__init__.py +9 -0
- pipecat/services/hamsa/stt.py +241 -0
- pipecat/services/heygen/__init__.py +5 -0
- pipecat/services/heygen/api.py +281 -0
- pipecat/services/heygen/client.py +620 -0
- pipecat/services/heygen/video.py +338 -0
- pipecat/services/image_service.py +5 -3
- pipecat/services/inworld/__init__.py +1 -0
- pipecat/services/inworld/tts.py +592 -0
- pipecat/services/llm_service.py +127 -45
- pipecat/services/lmnt/tts.py +80 -7
- pipecat/services/mcp_service.py +85 -44
- pipecat/services/mem0/memory.py +42 -13
- pipecat/services/minimax/tts.py +74 -15
- pipecat/services/mistral/__init__.py +0 -0
- pipecat/services/mistral/llm.py +185 -0
- pipecat/services/moondream/vision.py +55 -10
- pipecat/services/neuphonic/tts.py +275 -48
- pipecat/services/nim/llm.py +8 -6
- pipecat/services/ollama/llm.py +27 -7
- pipecat/services/openai/base_llm.py +54 -16
- pipecat/services/openai/image.py +30 -0
- pipecat/services/openai/llm.py +7 -5
- pipecat/services/openai/stt.py +13 -9
- pipecat/services/openai/tts.py +42 -10
- pipecat/services/openai_realtime_beta/azure.py +11 -9
- pipecat/services/openai_realtime_beta/context.py +7 -5
- pipecat/services/openai_realtime_beta/events.py +10 -7
- pipecat/services/openai_realtime_beta/openai.py +37 -18
- pipecat/services/openpipe/llm.py +30 -24
- pipecat/services/openrouter/llm.py +9 -7
- pipecat/services/perplexity/llm.py +15 -19
- pipecat/services/piper/tts.py +26 -12
- pipecat/services/playht/tts.py +227 -65
- pipecat/services/qwen/llm.py +8 -6
- pipecat/services/rime/tts.py +128 -17
- pipecat/services/riva/stt.py +160 -22
- pipecat/services/riva/tts.py +67 -2
- pipecat/services/sambanova/llm.py +19 -17
- pipecat/services/sambanova/stt.py +14 -8
- pipecat/services/sarvam/tts.py +60 -13
- pipecat/services/simli/video.py +82 -21
- pipecat/services/soniox/__init__.py +0 -0
- pipecat/services/soniox/stt.py +398 -0
- pipecat/services/speechmatics/stt.py +29 -17
- pipecat/services/stt_service.py +47 -11
- pipecat/services/tavus/video.py +94 -25
- pipecat/services/together/llm.py +8 -6
- pipecat/services/tts_service.py +77 -53
- pipecat/services/ultravox/stt.py +46 -43
- pipecat/services/vision_service.py +5 -3
- pipecat/services/websocket_service.py +12 -11
- pipecat/services/whisper/base_stt.py +58 -12
- pipecat/services/whisper/stt.py +69 -58
- pipecat/services/xtts/tts.py +59 -2
- pipecat/sync/base_notifier.py +19 -0
- pipecat/sync/event_notifier.py +24 -0
- pipecat/tests/utils.py +73 -5
- pipecat/transcriptions/language.py +24 -0
- pipecat/transports/base_input.py +112 -8
- pipecat/transports/base_output.py +235 -13
- pipecat/transports/base_transport.py +119 -0
- pipecat/transports/local/audio.py +76 -0
- pipecat/transports/local/tk.py +84 -0
- pipecat/transports/network/fastapi_websocket.py +174 -15
- pipecat/transports/network/small_webrtc.py +383 -39
- pipecat/transports/network/webrtc_connection.py +214 -8
- pipecat/transports/network/websocket_client.py +171 -1
- pipecat/transports/network/websocket_server.py +147 -9
- pipecat/transports/services/daily.py +792 -70
- pipecat/transports/services/helpers/daily_rest.py +122 -129
- pipecat/transports/services/livekit.py +339 -4
- pipecat/transports/services/tavus.py +273 -38
- pipecat/utils/asyncio/task_manager.py +92 -186
- pipecat/utils/base_object.py +83 -1
- pipecat/utils/network.py +2 -0
- pipecat/utils/string.py +114 -58
- pipecat/utils/text/base_text_aggregator.py +44 -13
- pipecat/utils/text/base_text_filter.py +46 -0
- pipecat/utils/text/markdown_text_filter.py +70 -14
- pipecat/utils/text/pattern_pair_aggregator.py +18 -14
- pipecat/utils/text/simple_text_aggregator.py +43 -2
- pipecat/utils/text/skip_tags_aggregator.py +21 -13
- pipecat/utils/time.py +36 -0
- pipecat/utils/tracing/class_decorators.py +32 -7
- pipecat/utils/tracing/conversation_context_provider.py +12 -2
- pipecat/utils/tracing/service_attributes.py +80 -64
- pipecat/utils/tracing/service_decorators.py +48 -21
- pipecat/utils/tracing/setup.py +13 -7
- pipecat/utils/tracing/turn_context_provider.py +12 -2
- pipecat/utils/tracing/turn_trace_observer.py +27 -0
- pipecat/utils/utils.py +14 -14
- dv_pipecat_ai-0.0.74.dev770.dist-info/RECORD +0 -319
- pipecat/examples/daily_runner.py +0 -64
- pipecat/examples/run.py +0 -265
- pipecat/utils/asyncio/watchdog_async_iterator.py +0 -72
- pipecat/utils/asyncio/watchdog_event.py +0 -42
- pipecat/utils/asyncio/watchdog_priority_queue.py +0 -48
- pipecat/utils/asyncio/watchdog_queue.py +0 -48
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.74.dev770.dist-info → dv_pipecat_ai-0.0.82.dev776.dist-info}/top_level.txt +0 -0
- /pipecat/{examples → extensions}/__init__.py +0 -0
|
@@ -4,9 +4,14 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""Base output transport implementation for Pipecat.
|
|
8
|
+
|
|
9
|
+
This module provides the BaseOutputTransport class which handles audio and video
|
|
10
|
+
output processing, including frame buffering, mixing, timing, and media streaming.
|
|
11
|
+
"""
|
|
12
|
+
|
|
7
13
|
import asyncio
|
|
8
14
|
import itertools
|
|
9
|
-
import sys
|
|
10
15
|
import time
|
|
11
16
|
from concurrent.futures import ThreadPoolExecutor
|
|
12
17
|
from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
|
|
@@ -15,7 +20,7 @@ from loguru import logger
|
|
|
15
20
|
from PIL import Image
|
|
16
21
|
|
|
17
22
|
from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
|
|
18
|
-
from pipecat.audio.utils import
|
|
23
|
+
from pipecat.audio.utils import create_stream_resampler, is_silence
|
|
19
24
|
from pipecat.frames.frames import (
|
|
20
25
|
BotSpeakingFrame,
|
|
21
26
|
BotStartedSpeakingFrame,
|
|
@@ -28,6 +33,8 @@ from pipecat.frames.frames import (
|
|
|
28
33
|
OutputDTMFFrame,
|
|
29
34
|
OutputDTMFUrgentFrame,
|
|
30
35
|
OutputImageRawFrame,
|
|
36
|
+
OutputTransportReadyFrame,
|
|
37
|
+
SpeechOutputAudioRawFrame,
|
|
31
38
|
SpriteFrame,
|
|
32
39
|
StartFrame,
|
|
33
40
|
StartInterruptionFrame,
|
|
@@ -39,7 +46,6 @@ from pipecat.frames.frames import (
|
|
|
39
46
|
)
|
|
40
47
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
|
|
41
48
|
from pipecat.transports.base_transport import TransportParams
|
|
42
|
-
from pipecat.utils.asyncio.watchdog_priority_queue import WatchdogPriorityQueue
|
|
43
49
|
from pipecat.utils.time import nanoseconds_to_seconds
|
|
44
50
|
|
|
45
51
|
# TODO: When we use GeminiMultimodalLiveLLMService, we need to change this to 0.35 but that creates issue for faster TTS.
|
|
@@ -47,7 +53,20 @@ BOT_VAD_STOP_SECS = 0.30
|
|
|
47
53
|
|
|
48
54
|
|
|
49
55
|
class BaseOutputTransport(FrameProcessor):
|
|
56
|
+
"""Base class for output transport implementations.
|
|
57
|
+
|
|
58
|
+
Handles audio and video output processing including frame buffering, audio mixing,
|
|
59
|
+
timing coordination, and media streaming. Supports multiple output destinations
|
|
60
|
+
and provides interruption handling for real-time communication.
|
|
61
|
+
"""
|
|
62
|
+
|
|
50
63
|
def __init__(self, params: TransportParams, **kwargs):
|
|
64
|
+
"""Initialize the base output transport.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
params: Transport configuration parameters.
|
|
68
|
+
**kwargs: Additional arguments passed to parent class.
|
|
69
|
+
"""
|
|
51
70
|
super().__init__(**kwargs)
|
|
52
71
|
|
|
53
72
|
self._params = params
|
|
@@ -68,13 +87,28 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
68
87
|
|
|
69
88
|
@property
|
|
70
89
|
def sample_rate(self) -> int:
|
|
90
|
+
"""Get the current audio sample rate.
|
|
91
|
+
|
|
92
|
+
Returns:
|
|
93
|
+
The sample rate in Hz.
|
|
94
|
+
"""
|
|
71
95
|
return self._sample_rate
|
|
72
96
|
|
|
73
97
|
@property
|
|
74
98
|
def audio_chunk_size(self) -> int:
|
|
99
|
+
"""Get the audio chunk size for output processing.
|
|
100
|
+
|
|
101
|
+
Returns:
|
|
102
|
+
The size of audio chunks in bytes.
|
|
103
|
+
"""
|
|
75
104
|
return self._audio_chunk_size
|
|
76
105
|
|
|
77
106
|
async def start(self, frame: StartFrame):
|
|
107
|
+
"""Start the output transport and initialize components.
|
|
108
|
+
|
|
109
|
+
Args:
|
|
110
|
+
frame: The start frame containing initialization parameters.
|
|
111
|
+
"""
|
|
78
112
|
self._sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate
|
|
79
113
|
|
|
80
114
|
# We will write 10ms*CHUNKS of audio at a time (where CHUNKS is the
|
|
@@ -84,15 +118,29 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
84
118
|
self._audio_chunk_size = audio_bytes_10ms * self._params.audio_out_10ms_chunks
|
|
85
119
|
|
|
86
120
|
async def stop(self, frame: EndFrame):
|
|
121
|
+
"""Stop the output transport and cleanup resources.
|
|
122
|
+
|
|
123
|
+
Args:
|
|
124
|
+
frame: The end frame signaling transport shutdown.
|
|
125
|
+
"""
|
|
87
126
|
for _, sender in self._media_senders.items():
|
|
88
127
|
await sender.stop(frame)
|
|
89
128
|
|
|
90
129
|
async def cancel(self, frame: CancelFrame):
|
|
130
|
+
"""Cancel the output transport and stop all processing.
|
|
131
|
+
|
|
132
|
+
Args:
|
|
133
|
+
frame: The cancel frame signaling immediate cancellation.
|
|
134
|
+
"""
|
|
91
135
|
for _, sender in self._media_senders.items():
|
|
92
136
|
await sender.cancel(frame)
|
|
93
137
|
|
|
94
138
|
async def set_transport_ready(self, frame: StartFrame):
|
|
95
|
-
"""
|
|
139
|
+
"""Called when the transport is ready to stream.
|
|
140
|
+
|
|
141
|
+
Args:
|
|
142
|
+
frame: The start frame containing initialization parameters.
|
|
143
|
+
"""
|
|
96
144
|
# Register destinations.
|
|
97
145
|
for destination in self._params.audio_out_destinations:
|
|
98
146
|
await self.register_audio_destination(destination)
|
|
@@ -127,28 +175,71 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
127
175
|
)
|
|
128
176
|
await self._media_senders[destination].start(frame)
|
|
129
177
|
|
|
178
|
+
# Sending a frame indicating that the output transport is ready and able to receive frames.
|
|
179
|
+
await self.push_frame(OutputTransportReadyFrame(), FrameDirection.UPSTREAM)
|
|
180
|
+
|
|
130
181
|
async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
|
|
182
|
+
"""Send a transport message.
|
|
183
|
+
|
|
184
|
+
Args:
|
|
185
|
+
frame: The transport message frame to send.
|
|
186
|
+
"""
|
|
131
187
|
pass
|
|
132
188
|
|
|
133
189
|
async def register_video_destination(self, destination: str):
|
|
190
|
+
"""Register a video output destination.
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
destination: The destination identifier to register.
|
|
194
|
+
"""
|
|
134
195
|
pass
|
|
135
196
|
|
|
136
197
|
async def register_audio_destination(self, destination: str):
|
|
198
|
+
"""Register an audio output destination.
|
|
199
|
+
|
|
200
|
+
Args:
|
|
201
|
+
destination: The destination identifier to register.
|
|
202
|
+
"""
|
|
137
203
|
pass
|
|
138
204
|
|
|
139
205
|
async def write_video_frame(self, frame: OutputImageRawFrame):
|
|
206
|
+
"""Write a video frame to the transport.
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
frame: The output video frame to write.
|
|
210
|
+
"""
|
|
140
211
|
pass
|
|
141
212
|
|
|
142
213
|
async def write_audio_frame(self, frame: OutputAudioRawFrame):
|
|
214
|
+
"""Write an audio frame to the transport.
|
|
215
|
+
|
|
216
|
+
Args:
|
|
217
|
+
frame: The output audio frame to write.
|
|
218
|
+
"""
|
|
143
219
|
pass
|
|
144
220
|
|
|
145
221
|
async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
|
|
222
|
+
"""Write a DTMF tone to the transport.
|
|
223
|
+
|
|
224
|
+
Args:
|
|
225
|
+
frame: The DTMF frame to write.
|
|
226
|
+
"""
|
|
146
227
|
pass
|
|
147
228
|
|
|
148
229
|
async def send_audio(self, frame: OutputAudioRawFrame):
|
|
230
|
+
"""Send an audio frame downstream.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
frame: The audio frame to send.
|
|
234
|
+
"""
|
|
149
235
|
await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
|
|
150
236
|
|
|
151
237
|
async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
|
|
238
|
+
"""Send an image frame downstream.
|
|
239
|
+
|
|
240
|
+
Args:
|
|
241
|
+
frame: The image frame to send.
|
|
242
|
+
"""
|
|
152
243
|
await self.queue_frame(frame, FrameDirection.DOWNSTREAM)
|
|
153
244
|
|
|
154
245
|
#
|
|
@@ -156,6 +247,12 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
156
247
|
#
|
|
157
248
|
|
|
158
249
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
250
|
+
"""Process incoming frames and handle transport-specific logic.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
frame: The frame to process.
|
|
254
|
+
direction: The direction of frame flow in the pipeline.
|
|
255
|
+
"""
|
|
159
256
|
await super().process_frame(frame, direction)
|
|
160
257
|
|
|
161
258
|
#
|
|
@@ -201,6 +298,7 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
201
298
|
await self._handle_frame(frame)
|
|
202
299
|
|
|
203
300
|
async def _handle_frame(self, frame: Frame):
|
|
301
|
+
"""Handle frames by routing them to appropriate media senders."""
|
|
204
302
|
if frame.transport_destination not in self._media_senders:
|
|
205
303
|
logger.warning(
|
|
206
304
|
f"{self} destination [{frame.transport_destination}] not registered for frame {frame}"
|
|
@@ -227,6 +325,12 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
227
325
|
#
|
|
228
326
|
|
|
229
327
|
class MediaSender:
|
|
328
|
+
"""Handles media streaming for a specific destination.
|
|
329
|
+
|
|
330
|
+
Manages audio and video output processing including buffering, timing,
|
|
331
|
+
mixing, and frame delivery for a single output destination.
|
|
332
|
+
"""
|
|
333
|
+
|
|
230
334
|
def __init__(
|
|
231
335
|
self,
|
|
232
336
|
transport: "BaseOutputTransport",
|
|
@@ -236,6 +340,15 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
236
340
|
audio_chunk_size: int,
|
|
237
341
|
params: TransportParams,
|
|
238
342
|
):
|
|
343
|
+
"""Initialize the media sender.
|
|
344
|
+
|
|
345
|
+
Args:
|
|
346
|
+
transport: The parent transport instance.
|
|
347
|
+
destination: The destination identifier for this sender.
|
|
348
|
+
sample_rate: The audio sample rate in Hz.
|
|
349
|
+
audio_chunk_size: The size of audio chunks in bytes.
|
|
350
|
+
params: Transport configuration parameters.
|
|
351
|
+
"""
|
|
239
352
|
self._transport = transport
|
|
240
353
|
self._destination = destination
|
|
241
354
|
self._sample_rate = sample_rate
|
|
@@ -249,7 +362,7 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
249
362
|
self._audio_buffer = bytearray()
|
|
250
363
|
|
|
251
364
|
# This will be used to resample incoming audio to the output sample rate.
|
|
252
|
-
self._resampler =
|
|
365
|
+
self._resampler = create_stream_resampler()
|
|
253
366
|
|
|
254
367
|
# The user can provide a single mixer, to be used by the default
|
|
255
368
|
# destination, or a destination/mixer mapping.
|
|
@@ -267,13 +380,28 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
267
380
|
|
|
268
381
|
@property
|
|
269
382
|
def sample_rate(self) -> int:
|
|
383
|
+
"""Get the audio sample rate.
|
|
384
|
+
|
|
385
|
+
Returns:
|
|
386
|
+
The sample rate in Hz.
|
|
387
|
+
"""
|
|
270
388
|
return self._sample_rate
|
|
271
389
|
|
|
272
390
|
@property
|
|
273
391
|
def audio_chunk_size(self) -> int:
|
|
392
|
+
"""Get the audio chunk size.
|
|
393
|
+
|
|
394
|
+
Returns:
|
|
395
|
+
The size of audio chunks in bytes.
|
|
396
|
+
"""
|
|
274
397
|
return self._audio_chunk_size
|
|
275
398
|
|
|
276
399
|
async def start(self, frame: StartFrame):
|
|
400
|
+
"""Start the media sender and initialize components.
|
|
401
|
+
|
|
402
|
+
Args:
|
|
403
|
+
frame: The start frame containing initialization parameters.
|
|
404
|
+
"""
|
|
277
405
|
self._audio_buffer = bytearray()
|
|
278
406
|
|
|
279
407
|
# Create all tasks.
|
|
@@ -294,8 +422,13 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
294
422
|
await self._mixer.start(self._sample_rate)
|
|
295
423
|
|
|
296
424
|
async def stop(self, frame: EndFrame):
|
|
425
|
+
"""Stop the media sender and cleanup resources.
|
|
426
|
+
|
|
427
|
+
Args:
|
|
428
|
+
frame: The end frame signaling sender shutdown.
|
|
429
|
+
"""
|
|
297
430
|
# Let the sink tasks process the queue until they reach this EndFrame.
|
|
298
|
-
await self._clock_queue.put((
|
|
431
|
+
await self._clock_queue.put((float("inf"), frame.id, frame))
|
|
299
432
|
await self._audio_queue.put(frame)
|
|
300
433
|
|
|
301
434
|
# At this point we have enqueued an EndFrame and we need to wait for
|
|
@@ -303,9 +436,9 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
303
436
|
# also need to wait for these tasks before cancelling the video task
|
|
304
437
|
# because it might be still rendering.
|
|
305
438
|
if self._audio_task:
|
|
306
|
-
await self.
|
|
439
|
+
await self._audio_task
|
|
307
440
|
if self._clock_task:
|
|
308
|
-
await self.
|
|
441
|
+
await self._clock_task
|
|
309
442
|
|
|
310
443
|
# Stop audio mixer.
|
|
311
444
|
if self._mixer:
|
|
@@ -315,12 +448,22 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
315
448
|
await self._cancel_video_task()
|
|
316
449
|
|
|
317
450
|
async def cancel(self, frame: CancelFrame):
|
|
451
|
+
"""Cancel the media sender and stop all processing.
|
|
452
|
+
|
|
453
|
+
Args:
|
|
454
|
+
frame: The cancel frame signaling immediate cancellation.
|
|
455
|
+
"""
|
|
318
456
|
# Since we are cancelling everything it doesn't matter what task we cancel first.
|
|
319
457
|
await self._cancel_audio_task()
|
|
320
458
|
await self._cancel_clock_task()
|
|
321
459
|
await self._cancel_video_task()
|
|
322
460
|
|
|
323
461
|
async def handle_interruptions(self, _: StartInterruptionFrame):
|
|
462
|
+
"""Handle interruption events by restarting tasks and clearing buffers.
|
|
463
|
+
|
|
464
|
+
Args:
|
|
465
|
+
_: The start interruption frame (unused).
|
|
466
|
+
"""
|
|
324
467
|
if not self._transport.interruptions_allowed:
|
|
325
468
|
return
|
|
326
469
|
|
|
@@ -336,6 +479,11 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
336
479
|
await self._bot_stopped_speaking()
|
|
337
480
|
|
|
338
481
|
async def handle_audio_frame(self, frame: OutputAudioRawFrame):
|
|
482
|
+
"""Handle incoming audio frames by buffering and chunking.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
frame: The output audio frame to handle.
|
|
486
|
+
"""
|
|
339
487
|
if not self._params.audio_out_enabled:
|
|
340
488
|
return
|
|
341
489
|
|
|
@@ -358,6 +506,11 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
358
506
|
self._audio_buffer = self._audio_buffer[self._audio_chunk_size :]
|
|
359
507
|
|
|
360
508
|
async def handle_image_frame(self, frame: OutputImageRawFrame | SpriteFrame):
|
|
509
|
+
"""Handle incoming image frames for video output.
|
|
510
|
+
|
|
511
|
+
Args:
|
|
512
|
+
frame: The output image or sprite frame to handle.
|
|
513
|
+
"""
|
|
361
514
|
if not self._params.video_out_enabled:
|
|
362
515
|
return
|
|
363
516
|
|
|
@@ -369,12 +522,27 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
369
522
|
await self._set_video_images(frame.images)
|
|
370
523
|
|
|
371
524
|
async def handle_timed_frame(self, frame: Frame):
|
|
525
|
+
"""Handle frames with presentation timestamps.
|
|
526
|
+
|
|
527
|
+
Args:
|
|
528
|
+
frame: The frame with timing information to handle.
|
|
529
|
+
"""
|
|
372
530
|
await self._clock_queue.put((frame.pts, frame.id, frame))
|
|
373
531
|
|
|
374
532
|
async def handle_sync_frame(self, frame: Frame):
|
|
533
|
+
"""Handle frames that need synchronized processing.
|
|
534
|
+
|
|
535
|
+
Args:
|
|
536
|
+
frame: The frame to handle synchronously.
|
|
537
|
+
"""
|
|
375
538
|
await self._audio_queue.put(frame)
|
|
376
539
|
|
|
377
540
|
async def handle_mixer_control_frame(self, frame: MixerControlFrame):
|
|
541
|
+
"""Handle audio mixer control frames.
|
|
542
|
+
|
|
543
|
+
Args:
|
|
544
|
+
frame: The mixer control frame to handle.
|
|
545
|
+
"""
|
|
378
546
|
if self._mixer:
|
|
379
547
|
await self._mixer.process_frame(frame)
|
|
380
548
|
|
|
@@ -383,16 +551,19 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
383
551
|
#
|
|
384
552
|
|
|
385
553
|
def _create_audio_task(self):
|
|
554
|
+
"""Create the audio processing task."""
|
|
386
555
|
if not self._audio_task:
|
|
387
556
|
self._audio_queue = asyncio.Queue()
|
|
388
557
|
self._audio_task = self._transport.create_task(self._audio_task_handler())
|
|
389
558
|
|
|
390
559
|
async def _cancel_audio_task(self):
|
|
560
|
+
"""Cancel and cleanup the audio processing task."""
|
|
391
561
|
if self._audio_task:
|
|
392
562
|
await self._transport.cancel_task(self._audio_task)
|
|
393
563
|
self._audio_task = None
|
|
394
564
|
|
|
395
565
|
async def _bot_started_speaking(self):
|
|
566
|
+
"""Handle bot started speaking event."""
|
|
396
567
|
if not self._bot_speaking:
|
|
397
568
|
self._transport.logger.debug(
|
|
398
569
|
f"Bot{f' [{self._destination}]' if self._destination else ''} started speaking"
|
|
@@ -408,6 +579,7 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
408
579
|
self._bot_speaking = True
|
|
409
580
|
|
|
410
581
|
async def _bot_stopped_speaking(self):
|
|
582
|
+
"""Handle bot stopped speaking event."""
|
|
411
583
|
if self._bot_speaking:
|
|
412
584
|
self._transport.logger.debug(
|
|
413
585
|
f"Bot{f' [{self._destination}]' if self._destination else ''} stopped speaking"
|
|
@@ -427,6 +599,11 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
427
599
|
self._audio_buffer = bytearray()
|
|
428
600
|
|
|
429
601
|
async def _handle_frame(self, frame: Frame):
|
|
602
|
+
"""Handle various frame types with appropriate processing.
|
|
603
|
+
|
|
604
|
+
Args:
|
|
605
|
+
frame: The frame to handle.
|
|
606
|
+
"""
|
|
430
607
|
if isinstance(frame, OutputImageRawFrame):
|
|
431
608
|
await self._set_video_image(frame)
|
|
432
609
|
elif isinstance(frame, SpriteFrame):
|
|
@@ -437,16 +614,20 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
437
614
|
await self._transport.write_dtmf(frame)
|
|
438
615
|
|
|
439
616
|
def _next_frame(self) -> AsyncGenerator[Frame, None]:
|
|
617
|
+
"""Generate the next frame for audio processing.
|
|
618
|
+
|
|
619
|
+
Returns:
|
|
620
|
+
An async generator yielding frames for processing.
|
|
621
|
+
"""
|
|
622
|
+
|
|
440
623
|
async def without_mixer(vad_stop_secs: float) -> AsyncGenerator[Frame, None]:
|
|
441
624
|
while True:
|
|
442
625
|
try:
|
|
443
626
|
frame = await asyncio.wait_for(
|
|
444
627
|
self._audio_queue.get(), timeout=vad_stop_secs
|
|
445
628
|
)
|
|
446
|
-
self._transport.reset_watchdog()
|
|
447
629
|
yield frame
|
|
448
630
|
except asyncio.TimeoutError:
|
|
449
|
-
self._transport.reset_watchdog()
|
|
450
631
|
# Notify the bot stopped speaking upstream if necessary.
|
|
451
632
|
await self._bot_stopped_speaking()
|
|
452
633
|
|
|
@@ -456,13 +637,11 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
456
637
|
while True:
|
|
457
638
|
try:
|
|
458
639
|
frame = self._audio_queue.get_nowait()
|
|
459
|
-
self._transport.reset_watchdog()
|
|
460
640
|
if isinstance(frame, OutputAudioRawFrame):
|
|
461
641
|
frame.audio = await self._mixer.mix(frame.audio)
|
|
462
642
|
last_frame_time = time.time()
|
|
463
643
|
yield frame
|
|
464
644
|
except asyncio.QueueEmpty:
|
|
465
|
-
self._transport.reset_watchdog()
|
|
466
645
|
# Notify the bot stopped speaking upstream if necessary.
|
|
467
646
|
diff_time = time.time() - last_frame_time
|
|
468
647
|
if diff_time > vad_stop_secs:
|
|
@@ -474,6 +653,11 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
474
653
|
num_channels=self._params.audio_out_channels,
|
|
475
654
|
)
|
|
476
655
|
yield frame
|
|
656
|
+
# Allow other asyncio tasks to execute by adding a small sleep
|
|
657
|
+
# Without this sleep, in task cancellation scenarios, this loop would
|
|
658
|
+
# continuously return without any delay, leading to 100% CPU utilization
|
|
659
|
+
# and preventing cancel/stop signals from being processed properly
|
|
660
|
+
await asyncio.sleep(0)
|
|
477
661
|
|
|
478
662
|
if self._mixer:
|
|
479
663
|
return with_mixer(BOT_VAD_STOP_SECS)
|
|
@@ -481,16 +665,31 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
481
665
|
return without_mixer(BOT_VAD_STOP_SECS)
|
|
482
666
|
|
|
483
667
|
async def _audio_task_handler(self):
|
|
668
|
+
"""Main audio processing task handler."""
|
|
484
669
|
# Push a BotSpeakingFrame every 200ms, we don't really need to push it
|
|
485
670
|
# at every audio chunk. If the audio chunk is bigger than 200ms, push at
|
|
486
671
|
# every audio chunk.
|
|
487
672
|
TOTAL_CHUNK_MS = self._params.audio_out_10ms_chunks * 10
|
|
488
673
|
BOT_SPEAKING_CHUNK_PERIOD = max(int(200 / TOTAL_CHUNK_MS), 1)
|
|
489
674
|
bot_speaking_counter = 0
|
|
675
|
+
speech_last_speaking_time = 0
|
|
676
|
+
|
|
490
677
|
async for frame in self._next_frame():
|
|
491
678
|
# Notify the bot started speaking upstream if necessary and that
|
|
492
679
|
# it's actually speaking.
|
|
680
|
+
is_speaking = False
|
|
493
681
|
if isinstance(frame, TTSAudioRawFrame):
|
|
682
|
+
is_speaking = True
|
|
683
|
+
elif isinstance(frame, SpeechOutputAudioRawFrame):
|
|
684
|
+
if not is_silence(frame.audio):
|
|
685
|
+
is_speaking = True
|
|
686
|
+
speech_last_speaking_time = time.time()
|
|
687
|
+
else:
|
|
688
|
+
silence_duration = time.time() - speech_last_speaking_time
|
|
689
|
+
if silence_duration > BOT_VAD_STOP_SECS:
|
|
690
|
+
await self._bot_stopped_speaking()
|
|
691
|
+
|
|
692
|
+
if is_speaking:
|
|
494
693
|
await self._bot_started_speaking()
|
|
495
694
|
if bot_speaking_counter % BOT_SPEAKING_CHUNK_PERIOD == 0:
|
|
496
695
|
await self._transport.push_frame(BotSpeakingFrame())
|
|
@@ -519,23 +718,36 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
519
718
|
#
|
|
520
719
|
|
|
521
720
|
def _create_video_task(self):
|
|
721
|
+
"""Create the video processing task if video output is enabled."""
|
|
522
722
|
if not self._video_task and self._params.video_out_enabled:
|
|
523
723
|
self._video_queue = asyncio.Queue()
|
|
524
724
|
self._video_task = self._transport.create_task(self._video_task_handler())
|
|
525
725
|
|
|
526
726
|
async def _cancel_video_task(self):
|
|
727
|
+
"""Cancel and cleanup the video processing task."""
|
|
527
728
|
# Stop video output task.
|
|
528
729
|
if self._video_task:
|
|
529
730
|
await self._transport.cancel_task(self._video_task)
|
|
530
731
|
self._video_task = None
|
|
531
732
|
|
|
532
733
|
async def _set_video_image(self, image: OutputImageRawFrame):
|
|
734
|
+
"""Set a single video image for cycling output.
|
|
735
|
+
|
|
736
|
+
Args:
|
|
737
|
+
image: The image frame to cycle for video output.
|
|
738
|
+
"""
|
|
533
739
|
self._video_images = itertools.cycle([image])
|
|
534
740
|
|
|
535
741
|
async def _set_video_images(self, images: List[OutputImageRawFrame]):
|
|
742
|
+
"""Set multiple video images for cycling output.
|
|
743
|
+
|
|
744
|
+
Args:
|
|
745
|
+
images: The list of image frames to cycle for video output.
|
|
746
|
+
"""
|
|
536
747
|
self._video_images = itertools.cycle(images)
|
|
537
748
|
|
|
538
749
|
async def _video_task_handler(self):
|
|
750
|
+
"""Main video processing task handler."""
|
|
539
751
|
self._video_start_time = None
|
|
540
752
|
self._video_frame_index = 0
|
|
541
753
|
self._video_frame_duration = 1 / self._params.video_out_framerate
|
|
@@ -551,6 +763,7 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
551
763
|
await asyncio.sleep(self._video_frame_duration)
|
|
552
764
|
|
|
553
765
|
async def _video_is_live_handler(self):
|
|
766
|
+
"""Handle live video streaming with frame timing."""
|
|
554
767
|
image = await self._video_queue.get()
|
|
555
768
|
|
|
556
769
|
# We get the start time as soon as we get the first image.
|
|
@@ -576,6 +789,12 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
576
789
|
self._video_queue.task_done()
|
|
577
790
|
|
|
578
791
|
async def _draw_image(self, frame: OutputImageRawFrame):
|
|
792
|
+
"""Draw/render an image frame with resizing if needed.
|
|
793
|
+
|
|
794
|
+
Args:
|
|
795
|
+
frame: The image frame to draw.
|
|
796
|
+
"""
|
|
797
|
+
|
|
579
798
|
def resize_frame(frame: OutputImageRawFrame) -> OutputImageRawFrame:
|
|
580
799
|
desired_size = (self._params.video_out_width, self._params.video_out_height)
|
|
581
800
|
|
|
@@ -602,16 +821,19 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
602
821
|
#
|
|
603
822
|
|
|
604
823
|
def _create_clock_task(self):
|
|
824
|
+
"""Create the clock/timing processing task."""
|
|
605
825
|
if not self._clock_task:
|
|
606
|
-
self._clock_queue =
|
|
826
|
+
self._clock_queue = asyncio.PriorityQueue()
|
|
607
827
|
self._clock_task = self._transport.create_task(self._clock_task_handler())
|
|
608
828
|
|
|
609
829
|
async def _cancel_clock_task(self):
|
|
830
|
+
"""Cancel and cleanup the clock processing task."""
|
|
610
831
|
if self._clock_task:
|
|
611
832
|
await self._transport.cancel_task(self._clock_task)
|
|
612
833
|
self._clock_task = None
|
|
613
834
|
|
|
614
835
|
async def _clock_task_handler(self):
|
|
836
|
+
"""Main clock/timing task handler for timed frame delivery."""
|
|
615
837
|
running = True
|
|
616
838
|
while running:
|
|
617
839
|
timestamp, _, frame = await self._clock_queue.get()
|