dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show
  1. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/types.py +47 -0
  12. pipecat/audio/dtmf/utils.py +70 -0
  13. pipecat/audio/filters/aic_filter.py +199 -0
  14. pipecat/audio/utils.py +9 -7
  15. pipecat/extensions/ivr/__init__.py +0 -0
  16. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  17. pipecat/frames/frames.py +156 -43
  18. pipecat/pipeline/llm_switcher.py +76 -0
  19. pipecat/pipeline/parallel_pipeline.py +3 -3
  20. pipecat/pipeline/service_switcher.py +144 -0
  21. pipecat/pipeline/task.py +68 -28
  22. pipecat/pipeline/task_observer.py +10 -0
  23. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  24. pipecat/processors/aggregators/llm_context.py +277 -0
  25. pipecat/processors/aggregators/llm_response.py +48 -15
  26. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  27. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  28. pipecat/processors/dtmf_aggregator.py +0 -2
  29. pipecat/processors/filters/stt_mute_filter.py +0 -2
  30. pipecat/processors/frame_processor.py +18 -11
  31. pipecat/processors/frameworks/rtvi.py +17 -10
  32. pipecat/processors/metrics/sentry.py +2 -0
  33. pipecat/runner/daily.py +137 -36
  34. pipecat/runner/run.py +1 -1
  35. pipecat/runner/utils.py +7 -7
  36. pipecat/serializers/asterisk.py +20 -4
  37. pipecat/serializers/exotel.py +1 -1
  38. pipecat/serializers/plivo.py +1 -1
  39. pipecat/serializers/telnyx.py +1 -1
  40. pipecat/serializers/twilio.py +1 -1
  41. pipecat/services/__init__.py +2 -2
  42. pipecat/services/anthropic/llm.py +113 -28
  43. pipecat/services/asyncai/tts.py +4 -0
  44. pipecat/services/aws/llm.py +82 -8
  45. pipecat/services/aws/tts.py +0 -10
  46. pipecat/services/aws_nova_sonic/aws.py +5 -0
  47. pipecat/services/cartesia/tts.py +28 -16
  48. pipecat/services/cerebras/llm.py +15 -10
  49. pipecat/services/deepgram/stt.py +8 -0
  50. pipecat/services/deepseek/llm.py +13 -8
  51. pipecat/services/fireworks/llm.py +13 -8
  52. pipecat/services/fish/tts.py +8 -6
  53. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  54. pipecat/services/gladia/config.py +7 -1
  55. pipecat/services/gladia/stt.py +23 -15
  56. pipecat/services/google/llm.py +159 -59
  57. pipecat/services/google/llm_openai.py +18 -3
  58. pipecat/services/grok/llm.py +2 -1
  59. pipecat/services/llm_service.py +38 -3
  60. pipecat/services/mem0/memory.py +2 -1
  61. pipecat/services/mistral/llm.py +5 -6
  62. pipecat/services/nim/llm.py +2 -1
  63. pipecat/services/openai/base_llm.py +88 -26
  64. pipecat/services/openai/image.py +6 -1
  65. pipecat/services/openai_realtime_beta/openai.py +5 -2
  66. pipecat/services/openpipe/llm.py +6 -8
  67. pipecat/services/perplexity/llm.py +13 -8
  68. pipecat/services/playht/tts.py +9 -6
  69. pipecat/services/rime/tts.py +1 -1
  70. pipecat/services/sambanova/llm.py +18 -13
  71. pipecat/services/sarvam/tts.py +415 -10
  72. pipecat/services/speechmatics/stt.py +2 -2
  73. pipecat/services/tavus/video.py +1 -1
  74. pipecat/services/tts_service.py +15 -5
  75. pipecat/services/vistaar/llm.py +2 -5
  76. pipecat/transports/base_input.py +32 -19
  77. pipecat/transports/base_output.py +39 -5
  78. pipecat/transports/daily/__init__.py +0 -0
  79. pipecat/transports/daily/transport.py +2371 -0
  80. pipecat/transports/daily/utils.py +410 -0
  81. pipecat/transports/livekit/__init__.py +0 -0
  82. pipecat/transports/livekit/transport.py +1042 -0
  83. pipecat/transports/network/fastapi_websocket.py +12 -546
  84. pipecat/transports/network/small_webrtc.py +12 -922
  85. pipecat/transports/network/webrtc_connection.py +9 -595
  86. pipecat/transports/network/websocket_client.py +12 -481
  87. pipecat/transports/network/websocket_server.py +12 -487
  88. pipecat/transports/services/daily.py +9 -2334
  89. pipecat/transports/services/helpers/daily_rest.py +12 -396
  90. pipecat/transports/services/livekit.py +12 -975
  91. pipecat/transports/services/tavus.py +12 -757
  92. pipecat/transports/smallwebrtc/__init__.py +0 -0
  93. pipecat/transports/smallwebrtc/connection.py +612 -0
  94. pipecat/transports/smallwebrtc/transport.py +936 -0
  95. pipecat/transports/tavus/__init__.py +0 -0
  96. pipecat/transports/tavus/transport.py +770 -0
  97. pipecat/transports/websocket/__init__.py +0 -0
  98. pipecat/transports/websocket/client.py +494 -0
  99. pipecat/transports/websocket/fastapi.py +559 -0
  100. pipecat/transports/websocket/server.py +500 -0
  101. pipecat/transports/whatsapp/__init__.py +0 -0
  102. pipecat/transports/whatsapp/api.py +345 -0
  103. pipecat/transports/whatsapp/client.py +364 -0
  104. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
  105. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
  106. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,2371 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Daily transport implementation for Pipecat.
8
+
9
+ This module provides comprehensive Daily video conferencing integration including
10
+ audio/video streaming, transcription, recording, dial-in/out functionality, and
11
+ real-time communication features.
12
+ """
13
+
14
+ import asyncio
15
+ import time
16
+ from concurrent.futures import CancelledError as FuturesCancelledError
17
+ from concurrent.futures import ThreadPoolExecutor
18
+ from dataclasses import dataclass
19
+ from typing import Any, Awaitable, Callable, Dict, Mapping, Optional
20
+
21
+ import aiohttp
22
+ from loguru import logger
23
+ from pydantic import BaseModel
24
+
25
+ from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADParams
26
+ from pipecat.frames.frames import (
27
+ CancelFrame,
28
+ EndFrame,
29
+ ErrorFrame,
30
+ Frame,
31
+ InputAudioRawFrame,
32
+ InputTransportMessageUrgentFrame,
33
+ InterimTranscriptionFrame,
34
+ OutputAudioRawFrame,
35
+ OutputImageRawFrame,
36
+ SpriteFrame,
37
+ StartFrame,
38
+ TranscriptionFrame,
39
+ TransportMessageFrame,
40
+ TransportMessageUrgentFrame,
41
+ UserAudioRawFrame,
42
+ UserImageRawFrame,
43
+ UserImageRequestFrame,
44
+ )
45
+ from pipecat.processors.frame_processor import FrameDirection, FrameProcessorSetup
46
+ from pipecat.transcriptions.language import Language
47
+ from pipecat.transports.base_input import BaseInputTransport
48
+ from pipecat.transports.base_output import BaseOutputTransport
49
+ from pipecat.transports.base_transport import BaseTransport, TransportParams
50
+ from pipecat.utils.asyncio.task_manager import BaseTaskManager
51
+
52
+ try:
53
+ from daily import (
54
+ AudioData,
55
+ CallClient,
56
+ CustomAudioSource,
57
+ CustomAudioTrack,
58
+ Daily,
59
+ EventHandler,
60
+ VideoFrame,
61
+ VirtualCameraDevice,
62
+ VirtualSpeakerDevice,
63
+ )
64
+ from daily import (
65
+ LogLevel as DailyLogLevel,
66
+ )
67
+ except ModuleNotFoundError as e:
68
+ logger.error(f"Exception: {e}")
69
+ logger.error(
70
+ "In order to use the Daily transport, you need to `pip install pipecat-ai[daily]`."
71
+ )
72
+ raise Exception(f"Missing module: {e}")
73
+
74
+ VAD_RESET_PERIOD_MS = 2000
75
+
76
+
77
+ @dataclass
78
+ class DailyTransportMessageFrame(TransportMessageFrame):
79
+ """Frame for transport messages in Daily calls.
80
+
81
+ Parameters:
82
+ participant_id: Optional ID of the participant this message is for/from.
83
+ """
84
+
85
+ participant_id: Optional[str] = None
86
+
87
+
88
+ @dataclass
89
+ class DailyTransportMessageUrgentFrame(TransportMessageUrgentFrame):
90
+ """Frame for urgent transport messages in Daily calls.
91
+
92
+ Parameters:
93
+ participant_id: Optional ID of the participant this message is for/from.
94
+ """
95
+
96
+ participant_id: Optional[str] = None
97
+
98
+
99
+ @dataclass
100
+ class DailyInputTransportMessageUrgentFrame(InputTransportMessageUrgentFrame):
101
+ """Frame for input urgent transport messages in Daily calls.
102
+
103
+ Parameters:
104
+ participant_id: Optional ID of the participant this message is for/from.
105
+ """
106
+
107
+ participant_id: Optional[str] = None
108
+
109
+
110
+ class WebRTCVADAnalyzer(VADAnalyzer):
111
+ """Voice Activity Detection analyzer using WebRTC.
112
+
113
+ Implements voice activity detection using Daily's native WebRTC VAD.
114
+ """
115
+
116
+ def __init__(self, *, sample_rate: Optional[int] = None, params: Optional[VADParams] = None):
117
+ """Initialize the WebRTC VAD analyzer.
118
+
119
+ Args:
120
+ sample_rate: Audio sample rate in Hz.
121
+ params: VAD configuration parameters.
122
+ """
123
+ super().__init__(sample_rate=sample_rate, params=params)
124
+
125
+ self._webrtc_vad = Daily.create_native_vad(
126
+ reset_period_ms=VAD_RESET_PERIOD_MS, sample_rate=self.sample_rate, channels=1
127
+ )
128
+ logger.debug("Loaded native WebRTC VAD")
129
+
130
+ def num_frames_required(self) -> int:
131
+ """Get the number of audio frames required for VAD analysis.
132
+
133
+ Returns:
134
+ The number of frames needed (equivalent to 10ms of audio).
135
+ """
136
+ return int(self.sample_rate / 100.0)
137
+
138
+ def voice_confidence(self, buffer) -> float:
139
+ """Analyze audio buffer and return voice confidence score.
140
+
141
+ Args:
142
+ buffer: Audio buffer to analyze.
143
+
144
+ Returns:
145
+ Voice confidence score between 0.0 and 1.0.
146
+ """
147
+ confidence = 0
148
+ if len(buffer) > 0:
149
+ confidence = self._webrtc_vad.analyze_frames(buffer)
150
+ return confidence
151
+
152
+
153
+ class DailyDialinSettings(BaseModel):
154
+ """Settings for Daily's dial-in functionality.
155
+
156
+ Parameters:
157
+ call_id: CallId is represented by UUID and represents the sessionId in the SIP Network.
158
+ call_domain: Call Domain is represented by UUID and represents your Daily Domain on the SIP Network.
159
+ """
160
+
161
+ call_id: str = ""
162
+ call_domain: str = ""
163
+
164
+
165
+ class DailyTranscriptionSettings(BaseModel):
166
+ """Configuration settings for Daily's transcription service.
167
+
168
+ Parameters:
169
+ language: ISO language code for transcription (e.g. "en").
170
+ model: Transcription model to use (e.g. "nova-2-general").
171
+ profanity_filter: Whether to filter profanity from transcripts.
172
+ redact: Whether to redact sensitive information.
173
+ endpointing: Whether to use endpointing to determine speech segments.
174
+ punctuate: Whether to add punctuation to transcripts.
175
+ includeRawResponse: Whether to include raw response data.
176
+ extra: Additional parameters passed to the Deepgram transcription service.
177
+ """
178
+
179
+ language: str = "en"
180
+ model: str = "nova-2-general"
181
+ profanity_filter: bool = True
182
+ redact: bool = False
183
+ endpointing: bool = True
184
+ punctuate: bool = True
185
+ includeRawResponse: bool = True
186
+ extra: Mapping[str, Any] = {"interim_results": True}
187
+
188
+
189
+ class DailyParams(TransportParams):
190
+ """Configuration parameters for Daily transport.
191
+
192
+ Parameters:
193
+ api_url: Daily API base URL.
194
+ api_key: Daily API authentication key.
195
+ audio_in_user_tracks: Receive users' audio in separate tracks
196
+ dialin_settings: Optional settings for dial-in functionality.
197
+ camera_out_enabled: Whether to enable the main camera output track.
198
+ microphone_out_enabled: Whether to enable the main microphone track.
199
+ transcription_enabled: Whether to enable speech transcription.
200
+ transcription_settings: Configuration for transcription service.
201
+ """
202
+
203
+ api_url: str = "https://api.daily.co/v1"
204
+ api_key: str = ""
205
+ audio_in_user_tracks: bool = True
206
+ dialin_settings: Optional[DailyDialinSettings] = None
207
+ camera_out_enabled: bool = True
208
+ microphone_out_enabled: bool = True
209
+ transcription_enabled: bool = False
210
+ transcription_settings: DailyTranscriptionSettings = DailyTranscriptionSettings()
211
+
212
+
213
+ class DailyCallbacks(BaseModel):
214
+ """Callback handlers for Daily events.
215
+
216
+ Parameters:
217
+ on_active_speaker_changed: Called when the active speaker of the call has changed.
218
+ on_joined: Called when bot successfully joined a room.
219
+ on_left: Called when bot left a room.
220
+ on_error: Called when an error occurs.
221
+ on_app_message: Called when receiving an app message.
222
+ on_call_state_updated: Called when call state changes.
223
+ on_client_connected: Called when a client (participant) connects.
224
+ on_client_disconnected: Called when a client (participant) disconnects.
225
+ on_dialin_connected: Called when dial-in is connected.
226
+ on_dialin_ready: Called when dial-in is ready.
227
+ on_dialin_stopped: Called when dial-in is stopped.
228
+ on_dialin_error: Called when dial-in encounters an error.
229
+ on_dialin_warning: Called when dial-in has a warning.
230
+ on_dialout_answered: Called when dial-out is answered.
231
+ on_dialout_connected: Called when dial-out is connected.
232
+ on_dialout_stopped: Called when dial-out is stopped.
233
+ on_dialout_error: Called when dial-out encounters an error.
234
+ on_dialout_warning: Called when dial-out has a warning.
235
+ on_participant_joined: Called when a participant joins.
236
+ on_participant_left: Called when a participant leaves.
237
+ on_participant_updated: Called when participant info is updated.
238
+ on_transcription_message: Called when receiving transcription.
239
+ on_transcription_stopped: Called when transcription is stopped.
240
+ on_transcription_error: Called when transcription encounters an error.
241
+ on_recording_started: Called when recording starts.
242
+ on_recording_stopped: Called when recording stops.
243
+ on_recording_error: Called when recording encounters an error.
244
+ """
245
+
246
+ on_active_speaker_changed: Callable[[Mapping[str, Any]], Awaitable[None]]
247
+ on_joined: Callable[[Mapping[str, Any]], Awaitable[None]]
248
+ on_left: Callable[[], Awaitable[None]]
249
+ on_error: Callable[[str], Awaitable[None]]
250
+ on_app_message: Callable[[Any, str], Awaitable[None]]
251
+ on_call_state_updated: Callable[[str], Awaitable[None]]
252
+ on_client_connected: Callable[[Mapping[str, Any]], Awaitable[None]]
253
+ on_client_disconnected: Callable[[Mapping[str, Any]], Awaitable[None]]
254
+ on_dialin_connected: Callable[[Any], Awaitable[None]]
255
+ on_dialin_ready: Callable[[str], Awaitable[None]]
256
+ on_dialin_stopped: Callable[[Any], Awaitable[None]]
257
+ on_dialin_error: Callable[[Any], Awaitable[None]]
258
+ on_dialin_warning: Callable[[Any], Awaitable[None]]
259
+ on_dialout_answered: Callable[[Any], Awaitable[None]]
260
+ on_dialout_connected: Callable[[Any], Awaitable[None]]
261
+ on_dialout_stopped: Callable[[Any], Awaitable[None]]
262
+ on_dialout_error: Callable[[Any], Awaitable[None]]
263
+ on_dialout_warning: Callable[[Any], Awaitable[None]]
264
+ on_participant_joined: Callable[[Mapping[str, Any]], Awaitable[None]]
265
+ on_participant_left: Callable[[Mapping[str, Any], str], Awaitable[None]]
266
+ on_participant_updated: Callable[[Mapping[str, Any]], Awaitable[None]]
267
+ on_transcription_message: Callable[[Mapping[str, Any]], Awaitable[None]]
268
+ on_transcription_stopped: Callable[[str, bool], Awaitable[None]]
269
+ on_transcription_error: Callable[[str], Awaitable[None]]
270
+ on_recording_started: Callable[[Mapping[str, Any]], Awaitable[None]]
271
+ on_recording_stopped: Callable[[str], Awaitable[None]]
272
+ on_recording_error: Callable[[str, str], Awaitable[None]]
273
+
274
+
275
+ def completion_callback(future):
276
+ """Create a completion callback for Daily API calls.
277
+
278
+ Args:
279
+ future: The asyncio Future to set the result on.
280
+
281
+ Returns:
282
+ A callback function that sets the future result.
283
+ """
284
+
285
+ def _callback(*args):
286
+ def set_result(future, *args):
287
+ try:
288
+ if len(args) > 1:
289
+ future.set_result(args)
290
+ else:
291
+ future.set_result(*args)
292
+ except asyncio.InvalidStateError:
293
+ pass
294
+
295
+ future.get_loop().call_soon_threadsafe(set_result, future, *args)
296
+
297
+ return _callback
298
+
299
+
300
+ @dataclass
301
+ class DailyAudioTrack:
302
+ """Container for Daily audio track components.
303
+
304
+ Parameters:
305
+ source: The custom audio source for the track.
306
+ track: The custom audio track instance.
307
+ """
308
+
309
+ source: CustomAudioSource
310
+ track: CustomAudioTrack
311
+
312
+
313
+ class DailyTransportClient(EventHandler):
314
+ """Core client for interacting with Daily's API.
315
+
316
+ Manages the connection to Daily rooms and handles all low-level API interactions
317
+ including room management, media streaming, transcription, and event handling.
318
+ """
319
+
320
+ _daily_initialized: bool = False
321
+
322
+ def __new__(cls, *args, **kwargs):
323
+ """Override EventHandler's __new__ method to ensure Daily is initialized only once."""
324
+ return super().__new__(cls)
325
+
326
+ def __init__(
327
+ self,
328
+ room_url: str,
329
+ token: Optional[str],
330
+ bot_name: str,
331
+ params: DailyParams,
332
+ callbacks: DailyCallbacks,
333
+ transport_name: str,
334
+ ):
335
+ """Initialize the Daily transport client.
336
+
337
+ Args:
338
+ room_url: URL of the Daily room to connect to.
339
+ token: Optional authentication token for the room.
340
+ bot_name: Display name for the bot in the call.
341
+ params: Configuration parameters for the transport.
342
+ callbacks: Event callback handlers.
343
+ transport_name: Name identifier for the transport.
344
+ """
345
+ super().__init__()
346
+
347
+ if not DailyTransportClient._daily_initialized:
348
+ DailyTransportClient._daily_initialized = True
349
+ Daily.init()
350
+
351
+ self._room_url: str = room_url
352
+ self._token: Optional[str] = token
353
+ self._bot_name: str = bot_name
354
+ self._params: DailyParams = params
355
+ self._callbacks = callbacks
356
+ self._transport_name = transport_name
357
+
358
+ self._participant_id: str = ""
359
+ self._audio_renderers = {}
360
+ self._video_renderers = {}
361
+ self._transcription_ids = []
362
+ self._transcription_status = None
363
+ self._dial_out_session_id: str = ""
364
+
365
+ self._joining = False
366
+ self._joined = False
367
+ self._joined_event = asyncio.Event()
368
+ self._leave_counter = 0
369
+
370
+ self._task_manager: Optional[BaseTaskManager] = None
371
+
372
+ # We use the executor to cleanup the client. We just do it from one
373
+ # place, so only one thread is really needed.
374
+ self._executor = ThreadPoolExecutor(max_workers=1)
375
+
376
+ self._client: CallClient = CallClient(event_handler=self)
377
+
378
+ # We use separate tasks to execute callbacks (events, audio or
379
+ # video). In the case of events, if we call a `CallClient` function
380
+ # inside the callback and wait for its completion this will result in a
381
+ # deadlock (because we haven't exited the event callback). The deadlocks
382
+ # occur because `daily-python` is holding the GIL when calling the
383
+ # callbacks. So, if our callback handler makes a `CallClient` call and
384
+ # waits for it to finish using completions (and a future) we will
385
+ # deadlock because completions use event handlers (which are holding the
386
+ # GIL).
387
+ self._event_task = None
388
+ self._audio_task = None
389
+ self._video_task = None
390
+
391
+ # Input and ouput sample rates. They will be initialize on setup().
392
+ self._in_sample_rate = 0
393
+ self._out_sample_rate = 0
394
+
395
+ self._camera: Optional[VirtualCameraDevice] = None
396
+ self._speaker: Optional[VirtualSpeakerDevice] = None
397
+ self._microphone_track: Optional[DailyAudioTrack] = None
398
+ self._custom_audio_tracks: Dict[str, DailyAudioTrack] = {}
399
+
400
+ def _camera_name(self):
401
+ """Generate a unique virtual camera name for this client instance."""
402
+ return f"camera-{self}"
403
+
404
+ def _speaker_name(self):
405
+ """Generate a unique virtual speaker name for this client instance."""
406
+ return f"speaker-{self}"
407
+
408
+ @property
409
+ def room_url(self) -> str:
410
+ """Get the Daily room URL.
411
+
412
+ Returns:
413
+ The room URL this client is connected to.
414
+ """
415
+ return self._room_url
416
+
417
+ @property
418
+ def participant_id(self) -> str:
419
+ """Get the participant ID for this client.
420
+
421
+ Returns:
422
+ The participant ID assigned by Daily.
423
+ """
424
+ return self._participant_id
425
+
426
+ @property
427
+ def in_sample_rate(self) -> int:
428
+ """Get the input audio sample rate.
429
+
430
+ Returns:
431
+ The input sample rate in Hz.
432
+ """
433
+ return self._in_sample_rate
434
+
435
+ @property
436
+ def out_sample_rate(self) -> int:
437
+ """Get the output audio sample rate.
438
+
439
+ Returns:
440
+ The output sample rate in Hz.
441
+ """
442
+ return self._out_sample_rate
443
+
444
+ async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
445
+ """Send an application message to participants.
446
+
447
+ Args:
448
+ frame: The message frame to send.
449
+ """
450
+ if not self._joined:
451
+ return
452
+
453
+ participant_id = None
454
+ if isinstance(frame, (DailyTransportMessageFrame, DailyTransportMessageUrgentFrame)):
455
+ participant_id = frame.participant_id
456
+
457
+ future = self._get_event_loop().create_future()
458
+ self._client.send_app_message(
459
+ frame.message, participant_id, completion=completion_callback(future)
460
+ )
461
+ await future
462
+
463
+ async def read_next_audio_frame(self) -> Optional[InputAudioRawFrame]:
464
+ """Reads the next 20ms audio frame from the virtual speaker."""
465
+ if not self._speaker:
466
+ return None
467
+
468
+ sample_rate = self._in_sample_rate
469
+ num_channels = self._params.audio_in_channels
470
+ num_frames = int(sample_rate / 100) * 2 # 20ms of audio
471
+
472
+ future = self._get_event_loop().create_future()
473
+ self._speaker.read_frames(num_frames, completion=completion_callback(future))
474
+ audio = await future
475
+
476
+ if len(audio) > 0:
477
+ return InputAudioRawFrame(
478
+ audio=audio, sample_rate=sample_rate, num_channels=num_channels
479
+ )
480
+ else:
481
+ # If we don't read any audio it could be there's no participant
482
+ # connected. daily-python will return immediately if that's the
483
+ # case, so let's sleep for a little bit (i.e. busy wait).
484
+ await asyncio.sleep(0.01)
485
+ return None
486
+
487
+ async def register_audio_destination(self, destination: str):
488
+ """Register a custom audio destination for multi-track output.
489
+
490
+ Args:
491
+ destination: The destination identifier to register.
492
+ """
493
+ self._custom_audio_tracks[destination] = await self.add_custom_audio_track(destination)
494
+ self._client.update_publishing({"customAudio": {destination: True}})
495
+
496
+ async def write_audio_frame(self, frame: OutputAudioRawFrame):
497
+ """Write an audio frame to the appropriate audio track.
498
+
499
+ Args:
500
+ frame: The audio frame to write.
501
+ """
502
+ future = self._get_event_loop().create_future()
503
+
504
+ destination = frame.transport_destination
505
+ audio_source: Optional[CustomAudioSource] = None
506
+ if not destination and self._microphone_track:
507
+ audio_source = self._microphone_track.source
508
+ elif destination and destination in self._custom_audio_tracks:
509
+ track = self._custom_audio_tracks[destination]
510
+ audio_source = track.source
511
+
512
+ if audio_source:
513
+ audio_source.write_frames(frame.audio, completion=completion_callback(future))
514
+ else:
515
+ logger.warning(f"{self} unable to write audio frames to destination [{destination}]")
516
+ future.set_result(None)
517
+
518
+ await future
519
+
520
+ async def write_video_frame(self, frame: OutputImageRawFrame):
521
+ """Write a video frame to the camera device.
522
+
523
+ Args:
524
+ frame: The image frame to write.
525
+ """
526
+ if not frame.transport_destination and self._camera:
527
+ self._camera.write_frame(frame.image)
528
+
529
+ async def setup(self, setup: FrameProcessorSetup):
530
+ """Setup the client with task manager and event queues.
531
+
532
+ Args:
533
+ setup: The frame processor setup configuration.
534
+ """
535
+ if self._task_manager:
536
+ return
537
+
538
+ self._task_manager = setup.task_manager
539
+
540
+ self._event_queue = asyncio.Queue()
541
+ self._event_task = self._task_manager.create_task(
542
+ self._callback_task_handler(self._event_queue),
543
+ f"{self}::event_callback_task",
544
+ )
545
+
546
+ async def cleanup(self):
547
+ """Cleanup client resources and cancel tasks."""
548
+ if self._event_task and self._task_manager:
549
+ await self._task_manager.cancel_task(self._event_task)
550
+ self._event_task = None
551
+ if self._audio_task and self._task_manager:
552
+ await self._task_manager.cancel_task(self._audio_task)
553
+ self._audio_task = None
554
+ if self._video_task and self._task_manager:
555
+ await self._task_manager.cancel_task(self._video_task)
556
+ self._video_task = None
557
+ # Make sure we don't block the event loop in case `client.release()`
558
+ # takes extra time.
559
+ await self._get_event_loop().run_in_executor(self._executor, self._cleanup)
560
+
561
+ async def start(self, frame: StartFrame):
562
+ """Start the client and initialize audio/video components.
563
+
564
+ Args:
565
+ frame: The start frame containing initialization parameters.
566
+ """
567
+ self._in_sample_rate = self._params.audio_in_sample_rate or frame.audio_in_sample_rate
568
+ self._out_sample_rate = self._params.audio_out_sample_rate or frame.audio_out_sample_rate
569
+
570
+ if self._params.audio_in_enabled:
571
+ if self._params.audio_in_user_tracks and not self._audio_task and self._task_manager:
572
+ self._audio_queue = asyncio.Queue()
573
+ self._audio_task = self._task_manager.create_task(
574
+ self._callback_task_handler(self._audio_queue),
575
+ f"{self}::audio_callback_task",
576
+ )
577
+ elif not self._speaker:
578
+ self._speaker = Daily.create_speaker_device(
579
+ self._speaker_name(),
580
+ sample_rate=self._in_sample_rate,
581
+ channels=self._params.audio_in_channels,
582
+ non_blocking=True,
583
+ )
584
+ Daily.select_speaker_device(self._speaker_name())
585
+
586
+ if self._params.video_in_enabled and not self._video_task and self._task_manager:
587
+ self._video_queue = asyncio.Queue()
588
+ self._video_task = self._task_manager.create_task(
589
+ self._callback_task_handler(self._video_queue),
590
+ f"{self}::video_callback_task",
591
+ )
592
+ if self._params.video_out_enabled and not self._camera:
593
+ self._camera = Daily.create_camera_device(
594
+ self._camera_name(),
595
+ width=self._params.video_out_width,
596
+ height=self._params.video_out_height,
597
+ color_format=self._params.video_out_color_format,
598
+ )
599
+
600
+ if self._params.audio_out_enabled and not self._microphone_track:
601
+ audio_source = CustomAudioSource(self._out_sample_rate, self._params.audio_out_channels)
602
+ audio_track = CustomAudioTrack(audio_source)
603
+ self._microphone_track = DailyAudioTrack(source=audio_source, track=audio_track)
604
+
605
+ async def join(self):
606
+ """Join the Daily room with configured settings."""
607
+ # Transport already joined or joining, ignore.
608
+ if self._joined or self._joining:
609
+ # Increment leave counter if we already joined.
610
+ self._leave_counter += 1
611
+ return
612
+
613
+ logger.info(f"Joining {self._room_url}")
614
+ self._joining = True
615
+
616
+ # For performance reasons, never subscribe to video streams (unless a
617
+ # video renderer is registered).
618
+ self._client.update_subscription_profiles(
619
+ {"base": {"camera": "unsubscribed", "screenVideo": "unsubscribed"}}
620
+ )
621
+
622
+ self._client.set_user_name(self._bot_name)
623
+
624
+ try:
625
+ (data, error) = await self._join()
626
+
627
+ if not error:
628
+ self._joined = True
629
+ self._joining = False
630
+ # Increment leave counter if we successfully joined.
631
+ self._leave_counter += 1
632
+
633
+ logger.info(f"Joined {self._room_url}")
634
+
635
+ if self._params.transcription_enabled:
636
+ await self.start_transcription(self._params.transcription_settings)
637
+
638
+ await self._callbacks.on_joined(data)
639
+
640
+ self._joined_event.set()
641
+ else:
642
+ error_msg = f"Error joining {self._room_url}: {error}"
643
+ logger.error(error_msg)
644
+ await self._callbacks.on_error(error_msg)
645
+ except asyncio.TimeoutError:
646
+ error_msg = f"Time out joining {self._room_url}"
647
+ logger.error(error_msg)
648
+ self._joining = False
649
+ await self._callbacks.on_error(error_msg)
650
+
651
+ async def _join(self):
652
+ """Execute the actual room join operation."""
653
+ if not self._client:
654
+ return
655
+
656
+ future = self._get_event_loop().create_future()
657
+
658
+ camera_enabled = self._params.video_out_enabled and self._params.camera_out_enabled
659
+ microphone_enabled = self._params.audio_out_enabled and self._params.microphone_out_enabled
660
+
661
+ self._client.join(
662
+ self._room_url,
663
+ self._token,
664
+ completion=completion_callback(future),
665
+ client_settings={
666
+ "inputs": {
667
+ "camera": {
668
+ "isEnabled": camera_enabled,
669
+ "settings": {
670
+ "deviceId": self._camera_name(),
671
+ },
672
+ },
673
+ "microphone": {
674
+ "isEnabled": microphone_enabled,
675
+ "settings": {
676
+ "customTrack": {
677
+ "id": self._microphone_track.track.id
678
+ if self._microphone_track
679
+ else "no-microphone-track"
680
+ }
681
+ },
682
+ },
683
+ },
684
+ "publishing": {
685
+ "camera": {
686
+ "sendSettings": {
687
+ "maxQuality": "low",
688
+ "encodings": {
689
+ "low": {
690
+ "maxBitrate": self._params.video_out_bitrate,
691
+ "maxFramerate": self._params.video_out_framerate,
692
+ }
693
+ },
694
+ }
695
+ },
696
+ "microphone": {
697
+ "sendSettings": {
698
+ "channelConfig": "stereo"
699
+ if self._params.audio_out_channels == 2
700
+ else "mono",
701
+ "bitrate": self._params.audio_out_bitrate,
702
+ }
703
+ },
704
+ },
705
+ },
706
+ )
707
+
708
+ return await asyncio.wait_for(future, timeout=10)
709
+
710
+ async def leave(self):
711
+ """Leave the Daily room and cleanup resources."""
712
+ # Decrement leave counter when leaving.
713
+ self._leave_counter -= 1
714
+
715
+ # Transport not joined, ignore.
716
+ if not self._joined or self._leave_counter > 0:
717
+ return
718
+
719
+ self._joined = False
720
+ self._joined_event.clear()
721
+
722
+ logger.info(f"Leaving {self._room_url}")
723
+
724
+ if self._params.transcription_enabled:
725
+ await self.stop_transcription()
726
+
727
+ # Remove any custom tracks, if any.
728
+ for track_name, _ in self._custom_audio_tracks.items():
729
+ await self.remove_custom_audio_track(track_name)
730
+
731
+ try:
732
+ error = await self._leave()
733
+ if not error:
734
+ logger.info(f"Left {self._room_url}")
735
+ await self._callbacks.on_left()
736
+ else:
737
+ error_msg = f"Error leaving {self._room_url}: {error}"
738
+ logger.error(error_msg)
739
+ await self._callbacks.on_error(error_msg)
740
+ except asyncio.TimeoutError:
741
+ error_msg = f"Time out leaving {self._room_url}"
742
+ logger.error(error_msg)
743
+ await self._callbacks.on_error(error_msg)
744
+
745
+ async def _leave(self):
746
+ """Execute the actual room leave operation."""
747
+ if not self._client:
748
+ return
749
+
750
+ future = self._get_event_loop().create_future()
751
+ self._client.leave(completion=completion_callback(future))
752
+ return await asyncio.wait_for(future, timeout=10)
753
+
754
+ def _cleanup(self):
755
+ """Cleanup the Daily client instance."""
756
+ if self._client:
757
+ self._client.release()
758
+ self._client = None
759
+
760
+ def participants(self):
761
+ """Get current participants in the room.
762
+
763
+ Returns:
764
+ Dictionary of participants keyed by participant ID.
765
+ """
766
+ return self._client.participants()
767
+
768
+ def participant_counts(self):
769
+ """Get participant count information.
770
+
771
+ Returns:
772
+ Dictionary with participant count details.
773
+ """
774
+ return self._client.participant_counts()
775
+
776
+ async def start_dialout(self, settings):
777
+ """Start a dial-out call to a phone number.
778
+
779
+ Args:
780
+ settings: Dial-out configuration settings.
781
+ """
782
+ logger.debug(f"Starting dialout: settings={settings}")
783
+
784
+ future = self._get_event_loop().create_future()
785
+ self._client.start_dialout(settings, completion=completion_callback(future))
786
+ error = await future
787
+ if error:
788
+ logger.error(f"Unable to start dialout: {error}")
789
+
790
+ async def stop_dialout(self, participant_id):
791
+ """Stop a dial-out call for a specific participant.
792
+
793
+ Args:
794
+ participant_id: ID of the participant to stop dial-out for.
795
+ """
796
+ logger.debug(f"Stopping dialout: participant_id={participant_id}")
797
+
798
+ future = self._get_event_loop().create_future()
799
+ self._client.stop_dialout(participant_id, completion=completion_callback(future))
800
+ error = await future
801
+ if error:
802
+ logger.error(f"Unable to stop dialout: {error}")
803
+
804
+ async def send_dtmf(self, settings):
805
+ """Send DTMF tones during a call.
806
+
807
+ Args:
808
+ settings: DTMF settings including tones and target session.
809
+ """
810
+ session_id = settings.get("sessionId") or self._dial_out_session_id
811
+ if not session_id:
812
+ logger.error("Unable to send DTMF: 'sessionId' is not set")
813
+ return
814
+
815
+ # Update 'sessionId' field.
816
+ settings["sessionId"] = session_id
817
+
818
+ future = self._get_event_loop().create_future()
819
+ self._client.send_dtmf(settings, completion=completion_callback(future))
820
+ await future
821
+
822
+ async def sip_call_transfer(self, settings):
823
+ """Transfer a SIP call to another destination.
824
+
825
+ Args:
826
+ settings: SIP call transfer settings.
827
+ """
828
+ future = self._get_event_loop().create_future()
829
+ self._client.sip_call_transfer(settings, completion=completion_callback(future))
830
+ await future
831
+
832
+ async def sip_refer(self, settings):
833
+ """Send a SIP REFER request.
834
+
835
+ Args:
836
+ settings: SIP REFER settings.
837
+ """
838
+ future = self._get_event_loop().create_future()
839
+ self._client.sip_refer(settings, completion=completion_callback(future))
840
+ await future
841
+
842
+ async def start_recording(self, streaming_settings, stream_id, force_new):
843
+ """Start recording the call.
844
+
845
+ Args:
846
+ streaming_settings: Recording configuration settings.
847
+ stream_id: Unique identifier for the recording stream.
848
+ force_new: Whether to force a new recording session.
849
+ """
850
+ logger.debug(
851
+ f"Starting recording: stream_id={stream_id} force_new={force_new} settings={streaming_settings}"
852
+ )
853
+
854
+ future = self._get_event_loop().create_future()
855
+ self._client.start_recording(
856
+ streaming_settings, stream_id, force_new, completion=completion_callback(future)
857
+ )
858
+ error = await future
859
+ if error:
860
+ logger.error(f"Unable to start recording: {error}")
861
+
862
+ async def stop_recording(self, stream_id):
863
+ """Stop recording the call.
864
+
865
+ Args:
866
+ stream_id: Unique identifier for the recording stream to stop.
867
+ """
868
+ logger.debug(f"Stopping recording: stream_id={stream_id}")
869
+
870
+ future = self._get_event_loop().create_future()
871
+ self._client.stop_recording(stream_id, completion=completion_callback(future))
872
+ error = await future
873
+ if error:
874
+ logger.error(f"Unable to stop recording: {error}")
875
+
876
+ async def start_transcription(self, settings):
877
+ """Start transcription for the call.
878
+
879
+ Args:
880
+ settings: Transcription configuration settings.
881
+ """
882
+ if not self._token:
883
+ logger.warning("Transcription can't be started without a room token")
884
+ return
885
+
886
+ logger.debug(f"Starting transcription: settings={settings}")
887
+
888
+ future = self._get_event_loop().create_future()
889
+ self._client.start_transcription(
890
+ settings=self._params.transcription_settings.model_dump(exclude_none=True),
891
+ completion=completion_callback(future),
892
+ )
893
+ error = await future
894
+ if error:
895
+ logger.error(f"Unable to start transcription: {error}")
896
+
897
+ async def stop_transcription(self):
898
+ """Stop transcription for the call."""
899
+ if not self._token:
900
+ return
901
+
902
+ logger.debug(f"Stopping transcription")
903
+
904
+ future = self._get_event_loop().create_future()
905
+ self._client.stop_transcription(completion=completion_callback(future))
906
+ error = await future
907
+ if error:
908
+ logger.error(f"Unable to stop transcription: {error}")
909
+
910
+ async def send_prebuilt_chat_message(self, message: str, user_name: Optional[str] = None):
911
+ """Send a chat message to Daily's Prebuilt main room.
912
+
913
+ Args:
914
+ message: The chat message to send.
915
+ user_name: Optional user name that will appear as sender of the message.
916
+ """
917
+ if not self._joined:
918
+ return
919
+
920
+ future = self._get_event_loop().create_future()
921
+ self._client.send_prebuilt_chat_message(
922
+ message, user_name=user_name, completion=completion_callback(future)
923
+ )
924
+ await future
925
+
926
+ async def capture_participant_transcription(self, participant_id: str):
927
+ """Enable transcription capture for a specific participant.
928
+
929
+ Args:
930
+ participant_id: ID of the participant to capture transcription for.
931
+ """
932
+ if not self._params.transcription_enabled:
933
+ return
934
+
935
+ self._transcription_ids.append(participant_id)
936
+ if self._joined and self._transcription_status:
937
+ await self.update_transcription(self._transcription_ids)
938
+
939
+ async def capture_participant_audio(
940
+ self,
941
+ participant_id: str,
942
+ callback: Callable,
943
+ audio_source: str = "microphone",
944
+ sample_rate: int = 16000,
945
+ callback_interval_ms: int = 20,
946
+ ):
947
+ """Capture audio from a specific participant.
948
+
949
+ Args:
950
+ participant_id: ID of the participant to capture audio from.
951
+ callback: Callback function to handle audio data.
952
+ audio_source: Audio source to capture (microphone, screenAudio, or custom).
953
+ sample_rate: Desired sample rate for audio capture.
954
+ callback_interval_ms: Interval between audio callbacks in milliseconds.
955
+ """
956
+ # Only enable the desired audio source subscription on this participant.
957
+ if audio_source in ("microphone", "screenAudio"):
958
+ media = {"media": {audio_source: "subscribed"}}
959
+ else:
960
+ media = {"media": {"customAudio": {audio_source: "subscribed"}}}
961
+
962
+ await self.update_subscriptions(participant_settings={participant_id: media})
963
+
964
+ self._audio_renderers.setdefault(participant_id, {})[audio_source] = callback
965
+
966
+ logger.debug(
967
+ f"Starting to capture [{audio_source}] audio from participant {participant_id}"
968
+ )
969
+
970
+ self._client.set_audio_renderer(
971
+ participant_id,
972
+ self._audio_data_received,
973
+ audio_source=audio_source,
974
+ sample_rate=sample_rate,
975
+ callback_interval_ms=callback_interval_ms,
976
+ )
977
+
978
+ async def capture_participant_video(
979
+ self,
980
+ participant_id: str,
981
+ callback: Callable,
982
+ framerate: int = 30,
983
+ video_source: str = "camera",
984
+ color_format: str = "RGB",
985
+ ):
986
+ """Capture video from a specific participant.
987
+
988
+ Args:
989
+ participant_id: ID of the participant to capture video from.
990
+ callback: Callback function to handle video frames.
991
+ framerate: Desired framerate for video capture.
992
+ video_source: Video source to capture (camera, screenVideo, or custom).
993
+ color_format: Color format for video frames.
994
+ """
995
+ # Only enable the desired audio source subscription on this participant.
996
+ if video_source in ("camera", "screenVideo"):
997
+ media = {"media": {video_source: "subscribed"}}
998
+ else:
999
+ media = {"media": {"customVideo": {video_source: "subscribed"}}}
1000
+
1001
+ await self.update_subscriptions(participant_settings={participant_id: media})
1002
+
1003
+ self._video_renderers.setdefault(participant_id, {})[video_source] = callback
1004
+
1005
+ logger.debug(
1006
+ f"Starting to capture [{video_source}] video from participant {participant_id}"
1007
+ )
1008
+
1009
+ self._client.set_video_renderer(
1010
+ participant_id,
1011
+ self._video_frame_received,
1012
+ video_source=video_source,
1013
+ color_format=color_format,
1014
+ )
1015
+
1016
+ async def add_custom_audio_track(self, track_name: str) -> DailyAudioTrack:
1017
+ """Add a custom audio track for multi-stream output.
1018
+
1019
+ Args:
1020
+ track_name: Name for the custom audio track.
1021
+
1022
+ Returns:
1023
+ The created DailyAudioTrack instance.
1024
+ """
1025
+ future = self._get_event_loop().create_future()
1026
+
1027
+ audio_source = CustomAudioSource(self._out_sample_rate, 1)
1028
+
1029
+ audio_track = CustomAudioTrack(audio_source)
1030
+
1031
+ self._client.add_custom_audio_track(
1032
+ track_name=track_name,
1033
+ audio_track=audio_track,
1034
+ ignore_audio_level=True,
1035
+ completion=completion_callback(future),
1036
+ )
1037
+
1038
+ await future
1039
+
1040
+ track = DailyAudioTrack(source=audio_source, track=audio_track)
1041
+
1042
+ return track
1043
+
1044
+ async def remove_custom_audio_track(self, track_name: str):
1045
+ """Remove a custom audio track.
1046
+
1047
+ Args:
1048
+ track_name: Name of the custom audio track to remove.
1049
+ """
1050
+ future = self._get_event_loop().create_future()
1051
+ self._client.remove_custom_audio_track(
1052
+ track_name=track_name,
1053
+ completion=completion_callback(future),
1054
+ )
1055
+ await future
1056
+
1057
+ async def update_transcription(self, participants=None, instance_id=None):
1058
+ """Update transcription settings for specific participants.
1059
+
1060
+ Args:
1061
+ participants: List of participant IDs to enable transcription for.
1062
+ instance_id: Optional transcription instance ID.
1063
+ """
1064
+ future = self._get_event_loop().create_future()
1065
+ self._client.update_transcription(
1066
+ participants, instance_id, completion=completion_callback(future)
1067
+ )
1068
+ await future
1069
+
1070
+ async def update_subscriptions(self, participant_settings=None, profile_settings=None):
1071
+ """Update media subscription settings.
1072
+
1073
+ Args:
1074
+ participant_settings: Per-participant subscription settings.
1075
+ profile_settings: Global subscription profile settings.
1076
+ """
1077
+ future = self._get_event_loop().create_future()
1078
+ self._client.update_subscriptions(
1079
+ participant_settings=participant_settings,
1080
+ profile_settings=profile_settings,
1081
+ completion=completion_callback(future),
1082
+ )
1083
+ await future
1084
+
1085
+ async def update_publishing(self, publishing_settings: Mapping[str, Any]):
1086
+ """Update media publishing settings.
1087
+
1088
+ Args:
1089
+ publishing_settings: Publishing configuration settings.
1090
+ """
1091
+ future = self._get_event_loop().create_future()
1092
+ self._client.update_publishing(
1093
+ publishing_settings=publishing_settings,
1094
+ completion=completion_callback(future),
1095
+ )
1096
+ await future
1097
+
1098
+ async def update_remote_participants(self, remote_participants: Mapping[str, Any]):
1099
+ """Update settings for remote participants.
1100
+
1101
+ Args:
1102
+ remote_participants: Remote participant configuration settings.
1103
+ """
1104
+ future = self._get_event_loop().create_future()
1105
+ self._client.update_remote_participants(
1106
+ remote_participants=remote_participants, completion=completion_callback(future)
1107
+ )
1108
+ await future
1109
+
1110
+ #
1111
+ #
1112
+ # Daily (EventHandler)
1113
+ #
1114
+
1115
+ def on_active_speaker_changed(self, participant):
1116
+ """Handle active speaker change events.
1117
+
1118
+ Args:
1119
+ participant: The new active speaker participant info.
1120
+ """
1121
+ self._call_event_callback(self._callbacks.on_active_speaker_changed, participant)
1122
+
1123
+ def on_app_message(self, message: Any, sender: str):
1124
+ """Handle application message events.
1125
+
1126
+ Args:
1127
+ message: The received message data.
1128
+ sender: ID of the message sender.
1129
+ """
1130
+ self._call_event_callback(self._callbacks.on_app_message, message, sender)
1131
+
1132
+ def on_call_state_updated(self, state: str):
1133
+ """Handle call state update events.
1134
+
1135
+ Args:
1136
+ state: The new call state.
1137
+ """
1138
+ self._call_event_callback(self._callbacks.on_call_state_updated, state)
1139
+
1140
+ def on_dialin_connected(self, data: Any):
1141
+ """Handle dial-in connected events.
1142
+
1143
+ Args:
1144
+ data: Dial-in connection data.
1145
+ """
1146
+ self._call_event_callback(self._callbacks.on_dialin_connected, data)
1147
+
1148
+ def on_dialin_ready(self, sip_endpoint: str):
1149
+ """Handle dial-in ready events.
1150
+
1151
+ Args:
1152
+ sip_endpoint: The SIP endpoint for dial-in.
1153
+ """
1154
+ self._call_event_callback(self._callbacks.on_dialin_ready, sip_endpoint)
1155
+
1156
+ def on_dialin_stopped(self, data: Any):
1157
+ """Handle dial-in stopped events.
1158
+
1159
+ Args:
1160
+ data: Dial-in stop data.
1161
+ """
1162
+ self._call_event_callback(self._callbacks.on_dialin_stopped, data)
1163
+
1164
+ def on_dialin_error(self, data: Any):
1165
+ """Handle dial-in error events.
1166
+
1167
+ Args:
1168
+ data: Dial-in error data.
1169
+ """
1170
+ self._call_event_callback(self._callbacks.on_dialin_error, data)
1171
+
1172
+ def on_dialin_warning(self, data: Any):
1173
+ """Handle dial-in warning events.
1174
+
1175
+ Args:
1176
+ data: Dial-in warning data.
1177
+ """
1178
+ self._call_event_callback(self._callbacks.on_dialin_warning, data)
1179
+
1180
+ def on_dialout_answered(self, data: Any):
1181
+ """Handle dial-out answered events.
1182
+
1183
+ Args:
1184
+ data: Dial-out answered data.
1185
+ """
1186
+ self._call_event_callback(self._callbacks.on_dialout_answered, data)
1187
+
1188
+ def on_dialout_connected(self, data: Any):
1189
+ """Handle dial-out connected events.
1190
+
1191
+ Args:
1192
+ data: Dial-out connection data.
1193
+ """
1194
+ self._dial_out_session_id = data["sessionId"] if "sessionId" in data else ""
1195
+ self._call_event_callback(self._callbacks.on_dialout_connected, data)
1196
+
1197
+ def on_dialout_stopped(self, data: Any):
1198
+ """Handle dial-out stopped events.
1199
+
1200
+ Args:
1201
+ data: Dial-out stop data.
1202
+ """
1203
+ # Cleanup only if our session stopped.
1204
+ if data["sessionId"] == self._dial_out_session_id:
1205
+ self._dial_out_session_id = ""
1206
+ self._call_event_callback(self._callbacks.on_dialout_stopped, data)
1207
+
1208
+ def on_dialout_error(self, data: Any):
1209
+ """Handle dial-out error events.
1210
+
1211
+ Args:
1212
+ data: Dial-out error data.
1213
+ """
1214
+ # Cleanup only if our session errored out.
1215
+ if data["sessionId"] == self._dial_out_session_id:
1216
+ self._dial_out_session_id = ""
1217
+ self._call_event_callback(self._callbacks.on_dialout_error, data)
1218
+
1219
+ def on_dialout_warning(self, data: Any):
1220
+ """Handle dial-out warning events.
1221
+
1222
+ Args:
1223
+ data: Dial-out warning data.
1224
+ """
1225
+ self._call_event_callback(self._callbacks.on_dialout_warning, data)
1226
+
1227
+ def on_participant_joined(self, participant):
1228
+ """Handle participant joined events.
1229
+
1230
+ Args:
1231
+ participant: The participant that joined.
1232
+ """
1233
+ self._call_event_callback(self._callbacks.on_participant_joined, participant)
1234
+
1235
+ def on_participant_left(self, participant, reason):
1236
+ """Handle participant left events.
1237
+
1238
+ Args:
1239
+ participant: The participant that left.
1240
+ reason: Reason for leaving.
1241
+ """
1242
+ self._call_event_callback(self._callbacks.on_participant_left, participant, reason)
1243
+
1244
+ def on_participant_updated(self, participant):
1245
+ """Handle participant updated events.
1246
+
1247
+ Args:
1248
+ participant: The updated participant info.
1249
+ """
1250
+ self._call_event_callback(self._callbacks.on_participant_updated, participant)
1251
+
1252
+ def on_transcription_started(self, status):
1253
+ """Handle transcription started events.
1254
+
1255
+ Args:
1256
+ status: Transcription start status.
1257
+ """
1258
+ logger.debug(f"Transcription started: {status}")
1259
+ self._transcription_status = status
1260
+ self._call_event_callback(self.update_transcription, self._transcription_ids)
1261
+
1262
+ def on_transcription_stopped(self, stopped_by, stopped_by_error):
1263
+ """Handle transcription stopped events.
1264
+
1265
+ Args:
1266
+ stopped_by: Who stopped the transcription.
1267
+ stopped_by_error: Whether stopped due to error.
1268
+ """
1269
+ logger.debug("Transcription stopped")
1270
+ self._call_event_callback(
1271
+ self._callbacks.on_transcription_stopped, stopped_by, stopped_by_error
1272
+ )
1273
+
1274
+ def on_transcription_error(self, message):
1275
+ """Handle transcription error events.
1276
+
1277
+ Args:
1278
+ message: Error message.
1279
+ """
1280
+ logger.error(f"Transcription error: {message}")
1281
+ self._call_event_callback(self._callbacks.on_transcription_error, message)
1282
+
1283
+ def on_transcription_message(self, message):
1284
+ """Handle transcription message events.
1285
+
1286
+ Args:
1287
+ message: The transcription message data.
1288
+ """
1289
+ self._call_event_callback(self._callbacks.on_transcription_message, message)
1290
+
1291
+ def on_recording_started(self, status):
1292
+ """Handle recording started events.
1293
+
1294
+ Args:
1295
+ status: Recording start status.
1296
+ """
1297
+ logger.debug(f"Recording started: {status}")
1298
+ self._call_event_callback(self._callbacks.on_recording_started, status)
1299
+
1300
+ def on_recording_stopped(self, stream_id):
1301
+ """Handle recording stopped events.
1302
+
1303
+ Args:
1304
+ stream_id: ID of the stopped recording stream.
1305
+ """
1306
+ logger.debug(f"Recording stopped: {stream_id}")
1307
+ self._call_event_callback(self._callbacks.on_recording_stopped, stream_id)
1308
+
1309
+ def on_recording_error(self, stream_id, message):
1310
+ """Handle recording error events.
1311
+
1312
+ Args:
1313
+ stream_id: ID of the recording stream with error.
1314
+ message: Error message.
1315
+ """
1316
+ logger.error(f"Recording error for {stream_id}: {message}")
1317
+ self._call_event_callback(self._callbacks.on_recording_error, stream_id, message)
1318
+
1319
+ #
1320
+ # Daily (CallClient callbacks)
1321
+ #
1322
+
1323
+ def _audio_data_received(self, participant_id: str, audio_data: AudioData, audio_source: str):
1324
+ """Handle received audio data from participants."""
1325
+ callback = self._audio_renderers[participant_id][audio_source]
1326
+ self._call_audio_callback(callback, participant_id, audio_data, audio_source)
1327
+
1328
+ def _video_frame_received(
1329
+ self, participant_id: str, video_frame: VideoFrame, video_source: str
1330
+ ):
1331
+ """Handle received video frames from participants."""
1332
+ callback = self._video_renderers[participant_id][video_source]
1333
+ self._call_video_callback(callback, participant_id, video_frame, video_source)
1334
+
1335
+ #
1336
+ # Queue callbacks handling
1337
+ #
1338
+
1339
+ def _call_audio_callback(self, callback, *args):
1340
+ """Queue an audio callback for async execution."""
1341
+ self._call_async_callback(self._audio_queue, callback, *args)
1342
+
1343
+ def _call_video_callback(self, callback, *args):
1344
+ """Queue a video callback for async execution."""
1345
+ self._call_async_callback(self._video_queue, callback, *args)
1346
+
1347
+ def _call_event_callback(self, callback, *args):
1348
+ """Queue an event callback for async execution."""
1349
+ self._call_async_callback(self._event_queue, callback, *args)
1350
+
1351
+ def _call_async_callback(self, queue: asyncio.Queue, callback, *args):
1352
+ """Queue a callback for async execution on the event loop."""
1353
+ try:
1354
+ future = asyncio.run_coroutine_threadsafe(
1355
+ queue.put((callback, *args)), self._get_event_loop()
1356
+ )
1357
+ future.result()
1358
+ except FuturesCancelledError:
1359
+ pass
1360
+
1361
+ async def _callback_task_handler(self, queue: asyncio.Queue):
1362
+ """Handle queued callbacks from the specified queue."""
1363
+ while True:
1364
+ # Wait to process any callback until we are joined.
1365
+ await self._joined_event.wait()
1366
+ (callback, *args) = await queue.get()
1367
+ await callback(*args)
1368
+ queue.task_done()
1369
+
1370
+ def _get_event_loop(self) -> asyncio.AbstractEventLoop:
1371
+ """Get the event loop from the task manager."""
1372
+ if not self._task_manager:
1373
+ raise Exception(f"{self}: missing task manager (pipeline not started?)")
1374
+ return self._task_manager.get_event_loop()
1375
+
1376
+ def __str__(self):
1377
+ """String representation of the DailyTransportClient."""
1378
+ return f"{self._transport_name}::DailyTransportClient"
1379
+
1380
+
1381
+ class DailyInputTransport(BaseInputTransport):
1382
+ """Handles incoming media streams and events from Daily calls.
1383
+
1384
+ Processes incoming audio, video, transcriptions and other events from Daily
1385
+ room participants, including participant media capture and event forwarding.
1386
+ """
1387
+
1388
+ def __init__(
1389
+ self,
1390
+ transport: BaseTransport,
1391
+ client: DailyTransportClient,
1392
+ params: DailyParams,
1393
+ **kwargs,
1394
+ ):
1395
+ """Initialize the Daily input transport.
1396
+
1397
+ Args:
1398
+ transport: The parent transport instance.
1399
+ client: DailyTransportClient instance.
1400
+ params: Configuration parameters.
1401
+ **kwargs: Additional arguments passed to parent class.
1402
+ """
1403
+ super().__init__(params, **kwargs)
1404
+
1405
+ self._transport = transport
1406
+ self._client = client
1407
+ self._params = params
1408
+
1409
+ self._video_renderers = {}
1410
+
1411
+ # Whether we have seen a StartFrame already.
1412
+ self._initialized = False
1413
+
1414
+ # Whether we have started audio streaming.
1415
+ self._streaming_started = False
1416
+
1417
+ # Store the list of participants we should stream. This is necessary in
1418
+ # case we don't start streaming right away.
1419
+ self._capture_participant_audio = []
1420
+
1421
+ # Audio task when using a virtual speaker (i.e. no user tracks).
1422
+ self._audio_in_task: Optional[asyncio.Task] = None
1423
+
1424
+ self._vad_analyzer: Optional[VADAnalyzer] = params.vad_analyzer
1425
+
1426
+ @property
1427
+ def vad_analyzer(self) -> Optional[VADAnalyzer]:
1428
+ """Get the Voice Activity Detection analyzer.
1429
+
1430
+ Returns:
1431
+ The VAD analyzer instance if configured.
1432
+ """
1433
+ return self._vad_analyzer
1434
+
1435
+ async def start_audio_in_streaming(self):
1436
+ """Start receiving audio from participants."""
1437
+ if not self._params.audio_in_enabled:
1438
+ return
1439
+
1440
+ logger.debug(f"Start receiving audio")
1441
+
1442
+ if self._params.audio_in_enabled:
1443
+ if self._params.audio_in_user_tracks:
1444
+ # Capture invididual participant tracks.
1445
+ for participant_id, audio_source, sample_rate in self._capture_participant_audio:
1446
+ await self._client.capture_participant_audio(
1447
+ participant_id, self._on_participant_audio_data, audio_source, sample_rate
1448
+ )
1449
+ elif not self._audio_in_task:
1450
+ # Create audio task. It reads audio frames from a single room
1451
+ # track and pushes them internally for VAD processing.
1452
+ self._audio_in_task = self.create_task(self._audio_in_task_handler())
1453
+
1454
+ self._streaming_started = True
1455
+
1456
+ async def setup(self, setup: FrameProcessorSetup):
1457
+ """Setup the input transport with shared client setup.
1458
+
1459
+ Args:
1460
+ setup: The frame processor setup configuration.
1461
+ """
1462
+ await super().setup(setup)
1463
+ await self._client.setup(setup)
1464
+
1465
+ async def cleanup(self):
1466
+ """Cleanup input transport and shared resources."""
1467
+ await super().cleanup()
1468
+ await self._client.cleanup()
1469
+ await self._transport.cleanup()
1470
+
1471
+ async def start(self, frame: StartFrame):
1472
+ """Start the input transport and join the Daily room.
1473
+
1474
+ Args:
1475
+ frame: The start frame containing initialization parameters.
1476
+ """
1477
+ # Parent start.
1478
+ await super().start(frame)
1479
+
1480
+ if self._initialized:
1481
+ return
1482
+
1483
+ self._initialized = True
1484
+
1485
+ # Setup client.
1486
+ await self._client.start(frame)
1487
+
1488
+ # Join the room.
1489
+ await self._client.join()
1490
+
1491
+ # Indicate the transport that we are connected.
1492
+ await self.set_transport_ready(frame)
1493
+
1494
+ if self._params.audio_in_stream_on_start:
1495
+ await self.start_audio_in_streaming()
1496
+
1497
+ async def stop(self, frame: EndFrame):
1498
+ """Stop the input transport and leave the Daily room.
1499
+
1500
+ Args:
1501
+ frame: The end frame signaling transport shutdown.
1502
+ """
1503
+ # Parent stop.
1504
+ await super().stop(frame)
1505
+ # Leave the room.
1506
+ await self._client.leave()
1507
+ # Stop audio thread.
1508
+ if self._audio_in_task:
1509
+ await self.cancel_task(self._audio_in_task)
1510
+ self._audio_in_task = None
1511
+
1512
+ async def cancel(self, frame: CancelFrame):
1513
+ """Cancel the input transport and leave the Daily room.
1514
+
1515
+ Args:
1516
+ frame: The cancel frame signaling immediate cancellation.
1517
+ """
1518
+ # Parent stop.
1519
+ await super().cancel(frame)
1520
+ # Leave the room.
1521
+ await self._client.leave()
1522
+ # Stop audio thread.
1523
+ if self._audio_in_task:
1524
+ await self.cancel_task(self._audio_in_task)
1525
+ self._audio_in_task = None
1526
+
1527
+ #
1528
+ # FrameProcessor
1529
+ #
1530
+
1531
+ async def process_frame(self, frame: Frame, direction: FrameDirection):
1532
+ """Process incoming frames, including user image requests.
1533
+
1534
+ Args:
1535
+ frame: The frame to process.
1536
+ direction: The direction of frame flow in the pipeline.
1537
+ """
1538
+ await super().process_frame(frame, direction)
1539
+
1540
+ if isinstance(frame, UserImageRequestFrame):
1541
+ await self.request_participant_image(frame)
1542
+
1543
+ #
1544
+ # Frames
1545
+ #
1546
+
1547
+ async def push_transcription_frame(self, frame: TranscriptionFrame | InterimTranscriptionFrame):
1548
+ """Push a transcription frame downstream.
1549
+
1550
+ Args:
1551
+ frame: The transcription frame to push.
1552
+ """
1553
+ await self.push_frame(frame)
1554
+
1555
+ async def push_app_message(self, message: Any, sender: str):
1556
+ """Push an application message as an urgent transport frame.
1557
+
1558
+ Args:
1559
+ message: The message data to send.
1560
+ sender: ID of the message sender.
1561
+ """
1562
+ frame = DailyInputTransportMessageUrgentFrame(message=message, participant_id=sender)
1563
+ await self.push_frame(frame)
1564
+
1565
+ #
1566
+ # Audio in
1567
+ #
1568
+
1569
+ async def capture_participant_audio(
1570
+ self,
1571
+ participant_id: str,
1572
+ audio_source: str = "microphone",
1573
+ sample_rate: int = 16000,
1574
+ ):
1575
+ """Capture audio from a specific participant.
1576
+
1577
+ Args:
1578
+ participant_id: ID of the participant to capture audio from.
1579
+ audio_source: Audio source to capture from.
1580
+ sample_rate: Desired sample rate for audio capture.
1581
+ """
1582
+ if self._streaming_started:
1583
+ await self._client.capture_participant_audio(
1584
+ participant_id, self._on_participant_audio_data, audio_source, sample_rate
1585
+ )
1586
+ else:
1587
+ self._capture_participant_audio.append((participant_id, audio_source, sample_rate))
1588
+
1589
+ async def _on_participant_audio_data(
1590
+ self, participant_id: str, audio: AudioData, audio_source: str
1591
+ ):
1592
+ """Handle received participant audio data."""
1593
+ frame = UserAudioRawFrame(
1594
+ user_id=participant_id,
1595
+ audio=audio.audio_frames,
1596
+ sample_rate=audio.sample_rate,
1597
+ num_channels=audio.num_channels,
1598
+ )
1599
+ frame.transport_source = audio_source
1600
+ await self.push_audio_frame(frame)
1601
+
1602
+ async def _audio_in_task_handler(self):
1603
+ while True:
1604
+ frame = await self._client.read_next_audio_frame()
1605
+ if frame:
1606
+ await self.push_audio_frame(frame)
1607
+
1608
+ #
1609
+ # Camera in
1610
+ #
1611
+
1612
+ async def capture_participant_video(
1613
+ self,
1614
+ participant_id: str,
1615
+ framerate: int = 30,
1616
+ video_source: str = "camera",
1617
+ color_format: str = "RGB",
1618
+ ):
1619
+ """Capture video from a specific participant.
1620
+
1621
+ Args:
1622
+ participant_id: ID of the participant to capture video from.
1623
+ framerate: Desired framerate for video capture.
1624
+ video_source: Video source to capture from.
1625
+ color_format: Color format for video frames.
1626
+ """
1627
+ if participant_id not in self._video_renderers:
1628
+ self._video_renderers[participant_id] = {}
1629
+
1630
+ self._video_renderers[participant_id][video_source] = {
1631
+ "framerate": framerate,
1632
+ "timestamp": 0,
1633
+ "render_next_frame": [],
1634
+ }
1635
+
1636
+ await self._client.capture_participant_video(
1637
+ participant_id, self._on_participant_video_frame, framerate, video_source, color_format
1638
+ )
1639
+
1640
+ async def request_participant_image(self, frame: UserImageRequestFrame):
1641
+ """Request a video frame from a specific participant.
1642
+
1643
+ Args:
1644
+ frame: The user image request frame.
1645
+ """
1646
+ if frame.user_id in self._video_renderers:
1647
+ video_source = frame.video_source if frame.video_source else "camera"
1648
+ self._video_renderers[frame.user_id][video_source]["render_next_frame"].append(frame)
1649
+
1650
+ async def _on_participant_video_frame(
1651
+ self, participant_id: str, video_frame: VideoFrame, video_source: str
1652
+ ):
1653
+ """Handle received participant video frames."""
1654
+ render_frame = False
1655
+
1656
+ curr_time = time.time()
1657
+ prev_time = self._video_renderers[participant_id][video_source]["timestamp"]
1658
+ framerate = self._video_renderers[participant_id][video_source]["framerate"]
1659
+
1660
+ # Some times we render frames because of a request.
1661
+ request_frame = None
1662
+
1663
+ if framerate > 0:
1664
+ next_time = prev_time + 1 / framerate
1665
+ render_frame = (next_time - curr_time) < 0.1
1666
+
1667
+ if self._video_renderers[participant_id][video_source]["render_next_frame"]:
1668
+ request_frame = self._video_renderers[participant_id][video_source][
1669
+ "render_next_frame"
1670
+ ].pop(0)
1671
+ render_frame = True
1672
+
1673
+ if render_frame:
1674
+ frame = UserImageRawFrame(
1675
+ user_id=participant_id,
1676
+ request=request_frame,
1677
+ image=video_frame.buffer,
1678
+ size=(video_frame.width, video_frame.height),
1679
+ format=video_frame.color_format,
1680
+ )
1681
+ frame.transport_source = video_source
1682
+ await self.push_video_frame(frame)
1683
+ self._video_renderers[participant_id][video_source]["timestamp"] = curr_time
1684
+
1685
+
1686
+ class DailyOutputTransport(BaseOutputTransport):
1687
+ """Handles outgoing media streams and events to Daily calls.
1688
+
1689
+ Manages sending audio, video and other data to Daily calls,
1690
+ including audio destination registration and message transmission.
1691
+ """
1692
+
1693
+ def __init__(
1694
+ self, transport: BaseTransport, client: DailyTransportClient, params: DailyParams, **kwargs
1695
+ ):
1696
+ """Initialize the Daily output transport.
1697
+
1698
+ Args:
1699
+ transport: The parent transport instance.
1700
+ client: DailyTransportClient instance.
1701
+ params: Configuration parameters.
1702
+ **kwargs: Additional arguments passed to parent class.
1703
+ """
1704
+ super().__init__(params, **kwargs)
1705
+
1706
+ self._transport = transport
1707
+ self._client = client
1708
+
1709
+ # Whether we have seen a StartFrame already.
1710
+ self._initialized = False
1711
+
1712
+ async def setup(self, setup: FrameProcessorSetup):
1713
+ """Setup the output transport with shared client setup.
1714
+
1715
+ Args:
1716
+ setup: The frame processor setup configuration.
1717
+ """
1718
+ await super().setup(setup)
1719
+ await self._client.setup(setup)
1720
+
1721
+ async def cleanup(self):
1722
+ """Cleanup output transport and shared resources."""
1723
+ await super().cleanup()
1724
+ await self._client.cleanup()
1725
+ await self._transport.cleanup()
1726
+
1727
+ async def start(self, frame: StartFrame):
1728
+ """Start the output transport and join the Daily room.
1729
+
1730
+ Args:
1731
+ frame: The start frame containing initialization parameters.
1732
+ """
1733
+ # Parent start.
1734
+ await super().start(frame)
1735
+
1736
+ if self._initialized:
1737
+ return
1738
+
1739
+ self._initialized = True
1740
+
1741
+ # Setup client.
1742
+ await self._client.start(frame)
1743
+
1744
+ # Join the room.
1745
+ await self._client.join()
1746
+
1747
+ # Indicate the transport that we are connected.
1748
+ await self.set_transport_ready(frame)
1749
+
1750
+ async def stop(self, frame: EndFrame):
1751
+ """Stop the output transport and leave the Daily room.
1752
+
1753
+ Args:
1754
+ frame: The end frame signaling transport shutdown.
1755
+ """
1756
+ # Parent stop.
1757
+ await super().stop(frame)
1758
+ # Leave the room.
1759
+ await self._client.leave()
1760
+
1761
+ async def cancel(self, frame: CancelFrame):
1762
+ """Cancel the output transport and leave the Daily room.
1763
+
1764
+ Args:
1765
+ frame: The cancel frame signaling immediate cancellation.
1766
+ """
1767
+ # Parent stop.
1768
+ await super().cancel(frame)
1769
+ # Leave the room.
1770
+ await self._client.leave()
1771
+
1772
+ async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
1773
+ """Send a transport message to participants.
1774
+
1775
+ Args:
1776
+ frame: The transport message frame to send.
1777
+ """
1778
+ await self._client.send_message(frame)
1779
+
1780
+ async def register_video_destination(self, destination: str):
1781
+ """Register a video output destination.
1782
+
1783
+ Args:
1784
+ destination: The destination identifier to register.
1785
+ """
1786
+ logger.warning(f"{self} registering video destinations is not supported yet")
1787
+
1788
+ async def register_audio_destination(self, destination: str):
1789
+ """Register an audio output destination.
1790
+
1791
+ Args:
1792
+ destination: The destination identifier to register.
1793
+ """
1794
+ await self._client.register_audio_destination(destination)
1795
+
1796
+ async def write_audio_frame(self, frame: OutputAudioRawFrame):
1797
+ """Write an audio frame to the Daily call.
1798
+
1799
+ Args:
1800
+ frame: The audio frame to write.
1801
+ """
1802
+ await self._client.write_audio_frame(frame)
1803
+
1804
+ async def write_video_frame(self, frame: OutputImageRawFrame):
1805
+ """Write a video frame to the Daily call.
1806
+
1807
+ Args:
1808
+ frame: The video frame to write.
1809
+ """
1810
+ await self._client.write_video_frame(frame)
1811
+
1812
+ def _supports_native_dtmf(self) -> bool:
1813
+ """Daily supports native DTMF via telephone events.
1814
+
1815
+ Returns:
1816
+ True, as Daily supports native DTMF transmission.
1817
+ """
1818
+ return True
1819
+
1820
+ async def _write_dtmf_native(self, frame):
1821
+ """Use Daily's native send_dtmf method for telephone events.
1822
+
1823
+ Args:
1824
+ frame: The DTMF frame to write.
1825
+ """
1826
+ await self._client.send_dtmf(
1827
+ {
1828
+ "sessionId": frame.transport_destination,
1829
+ "tones": frame.button.value,
1830
+ }
1831
+ )
1832
+
1833
+
1834
+ class DailyTransport(BaseTransport):
1835
+ """Transport implementation for Daily audio and video calls.
1836
+
1837
+ Provides comprehensive Daily integration including audio/video streaming,
1838
+ transcription, recording, dial-in/out functionality, and real-time communication
1839
+ features for conversational AI applications.
1840
+ """
1841
+
1842
+ def __init__(
1843
+ self,
1844
+ room_url: str,
1845
+ token: Optional[str],
1846
+ bot_name: str,
1847
+ params: Optional[DailyParams] = None,
1848
+ input_name: Optional[str] = None,
1849
+ output_name: Optional[str] = None,
1850
+ ):
1851
+ """Initialize the Daily transport.
1852
+
1853
+ Args:
1854
+ room_url: URL of the Daily room to connect to.
1855
+ token: Optional authentication token for the room.
1856
+ bot_name: Display name for the bot in the call.
1857
+ params: Configuration parameters for the transport.
1858
+ input_name: Optional name for the input transport.
1859
+ output_name: Optional name for the output transport.
1860
+ """
1861
+ super().__init__(input_name=input_name, output_name=output_name)
1862
+
1863
+ callbacks = DailyCallbacks(
1864
+ on_active_speaker_changed=self._on_active_speaker_changed,
1865
+ on_joined=self._on_joined,
1866
+ on_left=self._on_left,
1867
+ on_error=self._on_error,
1868
+ on_app_message=self._on_app_message,
1869
+ on_call_state_updated=self._on_call_state_updated,
1870
+ on_client_connected=self._on_client_connected,
1871
+ on_client_disconnected=self._on_client_disconnected,
1872
+ on_dialin_connected=self._on_dialin_connected,
1873
+ on_dialin_ready=self._on_dialin_ready,
1874
+ on_dialin_stopped=self._on_dialin_stopped,
1875
+ on_dialin_error=self._on_dialin_error,
1876
+ on_dialin_warning=self._on_dialin_warning,
1877
+ on_dialout_answered=self._on_dialout_answered,
1878
+ on_dialout_connected=self._on_dialout_connected,
1879
+ on_dialout_stopped=self._on_dialout_stopped,
1880
+ on_dialout_error=self._on_dialout_error,
1881
+ on_dialout_warning=self._on_dialout_warning,
1882
+ on_participant_joined=self._on_participant_joined,
1883
+ on_participant_left=self._on_participant_left,
1884
+ on_participant_updated=self._on_participant_updated,
1885
+ on_transcription_message=self._on_transcription_message,
1886
+ on_transcription_stopped=self._on_transcription_stopped,
1887
+ on_transcription_error=self._on_transcription_error,
1888
+ on_recording_started=self._on_recording_started,
1889
+ on_recording_stopped=self._on_recording_stopped,
1890
+ on_recording_error=self._on_recording_error,
1891
+ )
1892
+ self._params = params or DailyParams()
1893
+
1894
+ self._client = DailyTransportClient(
1895
+ room_url, token, bot_name, self._params, callbacks, self.name
1896
+ )
1897
+ self._input: Optional[DailyInputTransport] = None
1898
+ self._output: Optional[DailyOutputTransport] = None
1899
+
1900
+ self._other_participant_has_joined = False
1901
+
1902
+ # Register supported handlers. The user will only be able to register
1903
+ # these handlers.
1904
+ self._register_event_handler("on_active_speaker_changed")
1905
+ self._register_event_handler("on_joined")
1906
+ self._register_event_handler("on_left")
1907
+ self._register_event_handler("on_error")
1908
+ self._register_event_handler("on_app_message")
1909
+ self._register_event_handler("on_call_state_updated")
1910
+ self._register_event_handler("on_client_connected")
1911
+ self._register_event_handler("on_client_disconnected")
1912
+ self._register_event_handler("on_dialin_connected")
1913
+ self._register_event_handler("on_dialin_ready")
1914
+ self._register_event_handler("on_dialin_stopped")
1915
+ self._register_event_handler("on_dialin_error")
1916
+ self._register_event_handler("on_dialin_warning")
1917
+ self._register_event_handler("on_dialout_answered")
1918
+ self._register_event_handler("on_dialout_connected")
1919
+ self._register_event_handler("on_dialout_stopped")
1920
+ self._register_event_handler("on_dialout_error")
1921
+ self._register_event_handler("on_dialout_warning")
1922
+ self._register_event_handler("on_first_participant_joined")
1923
+ self._register_event_handler("on_participant_joined")
1924
+ self._register_event_handler("on_participant_left")
1925
+ self._register_event_handler("on_participant_updated")
1926
+ self._register_event_handler("on_transcription_message")
1927
+ self._register_event_handler("on_recording_started")
1928
+ self._register_event_handler("on_recording_stopped")
1929
+ self._register_event_handler("on_recording_error")
1930
+
1931
+ #
1932
+ # BaseTransport
1933
+ #
1934
+
1935
+ def input(self) -> DailyInputTransport:
1936
+ """Get the input transport for receiving media and events.
1937
+
1938
+ Returns:
1939
+ The Daily input transport instance.
1940
+ """
1941
+ if not self._input:
1942
+ self._input = DailyInputTransport(
1943
+ self, self._client, self._params, name=self._input_name
1944
+ )
1945
+ return self._input
1946
+
1947
+ def output(self) -> DailyOutputTransport:
1948
+ """Get the output transport for sending media and events.
1949
+
1950
+ Returns:
1951
+ The Daily output transport instance.
1952
+ """
1953
+ if not self._output:
1954
+ self._output = DailyOutputTransport(
1955
+ self, self._client, self._params, name=self._output_name
1956
+ )
1957
+ return self._output
1958
+
1959
+ #
1960
+ # DailyTransport
1961
+ #
1962
+
1963
+ @property
1964
+ def room_url(self) -> str:
1965
+ """Get the Daily room URL.
1966
+
1967
+ Returns:
1968
+ The room URL this transport is connected to.
1969
+ """
1970
+ return self._client.room_url
1971
+
1972
+ @property
1973
+ def participant_id(self) -> str:
1974
+ """Get the participant ID for this transport.
1975
+
1976
+ Returns:
1977
+ The participant ID assigned by Daily.
1978
+ """
1979
+ return self._client.participant_id
1980
+
1981
+ def set_log_level(self, level: DailyLogLevel):
1982
+ """Set the logging level for Daily's internal logging system.
1983
+
1984
+ Args:
1985
+ level: The log level to set. Should be a member of the DailyLogLevel enum,
1986
+ such as DailyLogLevel.Info, DailyLogLevel.Debug, etc.
1987
+
1988
+ Example:
1989
+ transport.set_log_level(DailyLogLevel.Info)
1990
+ """
1991
+ Daily.set_log_level(level)
1992
+
1993
+ async def send_image(self, frame: OutputImageRawFrame | SpriteFrame):
1994
+ """Send an image frame to the Daily call.
1995
+
1996
+ Args:
1997
+ frame: The image frame to send.
1998
+ """
1999
+ if self._output:
2000
+ await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM)
2001
+
2002
+ async def send_audio(self, frame: OutputAudioRawFrame):
2003
+ """Send an audio frame to the Daily call.
2004
+
2005
+ Args:
2006
+ frame: The audio frame to send.
2007
+ """
2008
+ if self._output:
2009
+ await self._output.queue_frame(frame, FrameDirection.DOWNSTREAM)
2010
+
2011
+ def participants(self):
2012
+ """Get current participants in the room.
2013
+
2014
+ Returns:
2015
+ Dictionary of participants keyed by participant ID.
2016
+ """
2017
+ return self._client.participants()
2018
+
2019
+ def participant_counts(self):
2020
+ """Get participant count information.
2021
+
2022
+ Returns:
2023
+ Dictionary with participant count details.
2024
+ """
2025
+ return self._client.participant_counts()
2026
+
2027
+ async def start_dialout(self, settings=None):
2028
+ """Start a dial-out call to a phone number.
2029
+
2030
+ Args:
2031
+ settings: Dial-out configuration settings.
2032
+ """
2033
+ await self._client.start_dialout(settings)
2034
+
2035
+ async def stop_dialout(self, participant_id):
2036
+ """Stop a dial-out call for a specific participant.
2037
+
2038
+ Args:
2039
+ participant_id: ID of the participant to stop dial-out for.
2040
+ """
2041
+ await self._client.stop_dialout(participant_id)
2042
+
2043
+ async def sip_call_transfer(self, settings):
2044
+ """Transfer a SIP call to another destination.
2045
+
2046
+ Args:
2047
+ settings: SIP call transfer settings.
2048
+ """
2049
+ await self._client.sip_call_transfer(settings)
2050
+
2051
+ async def sip_refer(self, settings):
2052
+ """Send a SIP REFER request.
2053
+
2054
+ Args:
2055
+ settings: SIP REFER settings.
2056
+ """
2057
+ await self._client.sip_refer(settings)
2058
+
2059
+ async def start_recording(self, streaming_settings=None, stream_id=None, force_new=None):
2060
+ """Start recording the call.
2061
+
2062
+ Args:
2063
+ streaming_settings: Recording configuration settings.
2064
+ stream_id: Unique identifier for the recording stream.
2065
+ force_new: Whether to force a new recording session.
2066
+ """
2067
+ await self._client.start_recording(streaming_settings, stream_id, force_new)
2068
+
2069
+ async def stop_recording(self, stream_id=None):
2070
+ """Stop recording the call.
2071
+
2072
+ Args:
2073
+ stream_id: Unique identifier for the recording stream to stop.
2074
+ """
2075
+ await self._client.stop_recording(stream_id)
2076
+
2077
+ async def start_transcription(self, settings=None):
2078
+ """Start transcription for the call.
2079
+
2080
+ Args:
2081
+ settings: Transcription configuration settings.
2082
+ """
2083
+ await self._client.start_transcription(settings)
2084
+
2085
+ async def stop_transcription(self):
2086
+ """Stop transcription for the call."""
2087
+ await self._client.stop_transcription()
2088
+
2089
+ async def send_prebuilt_chat_message(self, message: str, user_name: Optional[str] = None):
2090
+ """Send a chat message to Daily's Prebuilt main room.
2091
+
2092
+ Args:
2093
+ message: The chat message to send.
2094
+ user_name: Optional user name that will appear as sender of the message.
2095
+ """
2096
+ await self._client.send_prebuilt_chat_message(message, user_name)
2097
+
2098
+ async def capture_participant_transcription(self, participant_id: str):
2099
+ """Enable transcription capture for a specific participant.
2100
+
2101
+ Args:
2102
+ participant_id: ID of the participant to capture transcription for.
2103
+ """
2104
+ await self._client.capture_participant_transcription(participant_id)
2105
+
2106
+ async def capture_participant_audio(
2107
+ self,
2108
+ participant_id: str,
2109
+ audio_source: str = "microphone",
2110
+ sample_rate: int = 16000,
2111
+ ):
2112
+ """Capture audio from a specific participant.
2113
+
2114
+ Args:
2115
+ participant_id: ID of the participant to capture audio from.
2116
+ audio_source: Audio source to capture from.
2117
+ sample_rate: Desired sample rate for audio capture.
2118
+ """
2119
+ if self._input:
2120
+ await self._input.capture_participant_audio(participant_id, audio_source, sample_rate)
2121
+
2122
+ async def capture_participant_video(
2123
+ self,
2124
+ participant_id: str,
2125
+ framerate: int = 30,
2126
+ video_source: str = "camera",
2127
+ color_format: str = "RGB",
2128
+ ):
2129
+ """Capture video from a specific participant.
2130
+
2131
+ Args:
2132
+ participant_id: ID of the participant to capture video from.
2133
+ framerate: Desired framerate for video capture.
2134
+ video_source: Video source to capture from.
2135
+ color_format: Color format for video frames.
2136
+ """
2137
+ if self._input:
2138
+ await self._input.capture_participant_video(
2139
+ participant_id, framerate, video_source, color_format
2140
+ )
2141
+
2142
+ async def update_publishing(self, publishing_settings: Mapping[str, Any]):
2143
+ """Update media publishing settings.
2144
+
2145
+ Args:
2146
+ publishing_settings: Publishing configuration settings.
2147
+ """
2148
+ await self._client.update_publishing(publishing_settings=publishing_settings)
2149
+
2150
+ async def update_subscriptions(self, participant_settings=None, profile_settings=None):
2151
+ """Update media subscription settings.
2152
+
2153
+ Args:
2154
+ participant_settings: Per-participant subscription settings.
2155
+ profile_settings: Global subscription profile settings.
2156
+ """
2157
+ await self._client.update_subscriptions(
2158
+ participant_settings=participant_settings, profile_settings=profile_settings
2159
+ )
2160
+
2161
+ async def update_remote_participants(self, remote_participants: Mapping[str, Any]):
2162
+ """Update settings for remote participants.
2163
+
2164
+ Args:
2165
+ remote_participants: Remote participant configuration settings.
2166
+ """
2167
+ await self._client.update_remote_participants(remote_participants=remote_participants)
2168
+
2169
+ async def _on_active_speaker_changed(self, participant: Any):
2170
+ """Handle active speaker change events."""
2171
+ await self._call_event_handler("on_active_speaker_changed", participant)
2172
+
2173
+ async def _on_joined(self, data):
2174
+ """Handle room joined events."""
2175
+ await self._call_event_handler("on_joined", data)
2176
+
2177
+ async def _on_left(self):
2178
+ """Handle room left events."""
2179
+ await self._call_event_handler("on_left")
2180
+
2181
+ async def _on_error(self, error):
2182
+ """Handle error events and push error frames."""
2183
+ await self._call_event_handler("on_error", error)
2184
+ # Push error frame to notify the pipeline
2185
+ error_frame = ErrorFrame(error)
2186
+
2187
+ if self._input:
2188
+ await self._input.push_error(error_frame)
2189
+ elif self._output:
2190
+ await self._output.push_error(error_frame)
2191
+ else:
2192
+ logger.error("Both input and output are None while trying to push error")
2193
+ raise Exception("No valid input or output channel to push error")
2194
+
2195
+ async def _on_app_message(self, message: Any, sender: str):
2196
+ """Handle application message events."""
2197
+ if self._input:
2198
+ await self._input.push_app_message(message, sender)
2199
+ await self._call_event_handler("on_app_message", message, sender)
2200
+
2201
+ async def _on_call_state_updated(self, state: str):
2202
+ """Handle call state update events."""
2203
+ await self._call_event_handler("on_call_state_updated", state)
2204
+
2205
+ async def _on_client_connected(self, participant: Any):
2206
+ """Handle client connected events."""
2207
+ await self._call_event_handler("on_client_connected", participant)
2208
+
2209
+ async def _on_client_disconnected(self, participant: Any):
2210
+ """Handle client disconnected events."""
2211
+ await self._call_event_handler("on_client_disconnected", participant)
2212
+
2213
+ async def _handle_dialin_ready(self, sip_endpoint: str):
2214
+ """Handle dial-in ready events by updating SIP configuration."""
2215
+ if not self._params.dialin_settings:
2216
+ return
2217
+
2218
+ async with aiohttp.ClientSession() as session:
2219
+ headers = {
2220
+ "Authorization": f"Bearer {self._params.api_key}",
2221
+ "Content-Type": "application/json",
2222
+ }
2223
+ data = {
2224
+ "callId": self._params.dialin_settings.call_id,
2225
+ "callDomain": self._params.dialin_settings.call_domain,
2226
+ "sipUri": sip_endpoint,
2227
+ }
2228
+
2229
+ url = f"{self._params.api_url}/dialin/pinlessCallUpdate"
2230
+
2231
+ try:
2232
+ async with session.post(
2233
+ url, headers=headers, json=data, timeout=aiohttp.ClientTimeout(total=10)
2234
+ ) as r:
2235
+ if r.status != 200:
2236
+ text = await r.text()
2237
+ logger.error(
2238
+ f"Unable to handle dialin-ready event (status: {r.status}, error: {text})"
2239
+ )
2240
+ return
2241
+
2242
+ logger.debug("Event dialin-ready was handled successfully")
2243
+ except asyncio.TimeoutError:
2244
+ logger.error(f"Timeout handling dialin-ready event ({url})")
2245
+ except Exception as e:
2246
+ logger.exception(f"Error handling dialin-ready event ({url}): {e}")
2247
+
2248
+ async def _on_dialin_connected(self, data):
2249
+ """Handle dial-in connected events."""
2250
+ await self._call_event_handler("on_dialin_connected", data)
2251
+
2252
+ async def _on_dialin_ready(self, sip_endpoint):
2253
+ """Handle dial-in ready events."""
2254
+ if self._params.dialin_settings:
2255
+ await self._handle_dialin_ready(sip_endpoint)
2256
+ await self._call_event_handler("on_dialin_ready", sip_endpoint)
2257
+
2258
+ async def _on_dialin_stopped(self, data):
2259
+ """Handle dial-in stopped events."""
2260
+ await self._call_event_handler("on_dialin_stopped", data)
2261
+
2262
+ async def _on_dialin_error(self, data):
2263
+ """Handle dial-in error events."""
2264
+ await self._call_event_handler("on_dialin_error", data)
2265
+
2266
+ async def _on_dialin_warning(self, data):
2267
+ """Handle dial-in warning events."""
2268
+ await self._call_event_handler("on_dialin_warning", data)
2269
+
2270
+ async def _on_dialout_answered(self, data):
2271
+ """Handle dial-out answered events."""
2272
+ await self._call_event_handler("on_dialout_answered", data)
2273
+
2274
+ async def _on_dialout_connected(self, data):
2275
+ """Handle dial-out connected events."""
2276
+ await self._call_event_handler("on_dialout_connected", data)
2277
+
2278
+ async def _on_dialout_stopped(self, data):
2279
+ """Handle dial-out stopped events."""
2280
+ await self._call_event_handler("on_dialout_stopped", data)
2281
+
2282
+ async def _on_dialout_error(self, data):
2283
+ """Handle dial-out error events."""
2284
+ await self._call_event_handler("on_dialout_error", data)
2285
+
2286
+ async def _on_dialout_warning(self, data):
2287
+ """Handle dial-out warning events."""
2288
+ await self._call_event_handler("on_dialout_warning", data)
2289
+
2290
+ async def _on_participant_joined(self, participant):
2291
+ """Handle participant joined events."""
2292
+ id = participant["id"]
2293
+ logger.info(f"Participant joined {id}")
2294
+
2295
+ if self._input and self._params.audio_in_enabled and self._params.audio_in_user_tracks:
2296
+ await self._input.capture_participant_audio(
2297
+ id, "microphone", self._client.in_sample_rate
2298
+ )
2299
+
2300
+ if not self._other_participant_has_joined:
2301
+ self._other_participant_has_joined = True
2302
+ await self._call_event_handler("on_first_participant_joined", participant)
2303
+
2304
+ await self._call_event_handler("on_participant_joined", participant)
2305
+ # Also call on_client_connected for compatibility with other transports
2306
+ await self._call_event_handler("on_client_connected", participant)
2307
+
2308
+ async def _on_participant_left(self, participant, reason):
2309
+ """Handle participant left events."""
2310
+ id = participant["id"]
2311
+ logger.info(f"Participant left {id}")
2312
+ await self._call_event_handler("on_participant_left", participant, reason)
2313
+ # Also call on_client_disconnected for compatibility with other transports
2314
+ await self._call_event_handler("on_client_disconnected", participant)
2315
+
2316
+ async def _on_participant_updated(self, participant):
2317
+ """Handle participant updated events."""
2318
+ await self._call_event_handler("on_participant_updated", participant)
2319
+
2320
+ async def _on_transcription_message(self, message):
2321
+ """Handle transcription message events."""
2322
+ await self._call_event_handler("on_transcription_message", message)
2323
+
2324
+ participant_id = ""
2325
+ if "participantId" in message:
2326
+ participant_id = message["participantId"]
2327
+ if not participant_id:
2328
+ return
2329
+
2330
+ text = message["text"]
2331
+ timestamp = message["timestamp"]
2332
+ is_final = message["rawResponse"]["is_final"]
2333
+ try:
2334
+ language = message["rawResponse"]["channel"]["alternatives"][0]["languages"][0]
2335
+ language = Language(language)
2336
+ except KeyError:
2337
+ language = None
2338
+ if is_final:
2339
+ frame = TranscriptionFrame(text, participant_id, timestamp, language, result=message)
2340
+ logger.debug(f"Transcription (from: {participant_id}): [{text}]")
2341
+ else:
2342
+ frame = InterimTranscriptionFrame(
2343
+ text,
2344
+ participant_id,
2345
+ timestamp,
2346
+ language,
2347
+ result=message,
2348
+ )
2349
+
2350
+ if self._input:
2351
+ await self._input.push_transcription_frame(frame)
2352
+
2353
+ async def _on_transcription_stopped(self, stopped_by, stopped_by_error):
2354
+ """Handle transcription stopped events."""
2355
+ await self._call_event_handler("on_transcription_stopped", stopped_by, stopped_by_error)
2356
+
2357
+ async def _on_transcription_error(self, message):
2358
+ """Handle transcription error events."""
2359
+ await self._call_event_handler("on_transcription_error", message)
2360
+
2361
+ async def _on_recording_started(self, status):
2362
+ """Handle recording started events."""
2363
+ await self._call_event_handler("on_recording_started", status)
2364
+
2365
+ async def _on_recording_stopped(self, stream_id):
2366
+ """Handle recording stopped events."""
2367
+ await self._call_event_handler("on_recording_stopped", stream_id)
2368
+
2369
+ async def _on_recording_error(self, stream_id, message):
2370
+ """Handle recording error events."""
2371
+ await self._call_event_handler("on_recording_error", stream_id, message)