dv-pipecat-ai 0.0.85.dev7__py3-none-any.whl → 0.0.85.dev699__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/METADATA +78 -117
  2. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/RECORD +158 -122
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  11. pipecat/audio/filters/noisereduce_filter.py +15 -0
  12. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  13. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  14. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  15. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  16. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  17. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  18. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  19. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  20. pipecat/audio/vad/data/README.md +10 -0
  21. pipecat/audio/vad/vad_analyzer.py +13 -1
  22. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  23. pipecat/frames/frames.py +120 -87
  24. pipecat/observers/loggers/debug_log_observer.py +3 -3
  25. pipecat/observers/loggers/llm_log_observer.py +7 -3
  26. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  27. pipecat/pipeline/runner.py +12 -4
  28. pipecat/pipeline/service_switcher.py +64 -36
  29. pipecat/pipeline/task.py +85 -24
  30. pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
  31. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  32. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  33. pipecat/processors/aggregators/llm_response.py +6 -7
  34. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  35. pipecat/processors/aggregators/user_response.py +6 -6
  36. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  37. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  38. pipecat/processors/filters/stt_mute_filter.py +2 -0
  39. pipecat/processors/frame_processor.py +103 -17
  40. pipecat/processors/frameworks/langchain.py +8 -2
  41. pipecat/processors/frameworks/rtvi.py +209 -68
  42. pipecat/processors/frameworks/strands_agents.py +170 -0
  43. pipecat/processors/logger.py +2 -2
  44. pipecat/processors/transcript_processor.py +4 -4
  45. pipecat/processors/user_idle_processor.py +3 -6
  46. pipecat/runner/run.py +270 -50
  47. pipecat/runner/types.py +2 -0
  48. pipecat/runner/utils.py +51 -10
  49. pipecat/serializers/exotel.py +5 -5
  50. pipecat/serializers/livekit.py +20 -0
  51. pipecat/serializers/plivo.py +6 -9
  52. pipecat/serializers/protobuf.py +6 -5
  53. pipecat/serializers/telnyx.py +2 -2
  54. pipecat/serializers/twilio.py +43 -23
  55. pipecat/services/ai_service.py +2 -6
  56. pipecat/services/anthropic/llm.py +2 -25
  57. pipecat/services/asyncai/tts.py +2 -3
  58. pipecat/services/aws/__init__.py +1 -0
  59. pipecat/services/aws/llm.py +122 -97
  60. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  61. pipecat/services/aws/nova_sonic/context.py +367 -0
  62. pipecat/services/aws/nova_sonic/frames.py +25 -0
  63. pipecat/services/aws/nova_sonic/llm.py +1155 -0
  64. pipecat/services/aws/stt.py +1 -3
  65. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  66. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  67. pipecat/services/aws_nova_sonic/context.py +13 -355
  68. pipecat/services/aws_nova_sonic/frames.py +13 -17
  69. pipecat/services/azure/realtime/__init__.py +0 -0
  70. pipecat/services/azure/realtime/llm.py +65 -0
  71. pipecat/services/azure/stt.py +15 -0
  72. pipecat/services/cartesia/tts.py +2 -2
  73. pipecat/services/deepgram/__init__.py +1 -0
  74. pipecat/services/deepgram/flux/__init__.py +0 -0
  75. pipecat/services/deepgram/flux/stt.py +636 -0
  76. pipecat/services/elevenlabs/__init__.py +2 -1
  77. pipecat/services/elevenlabs/stt.py +254 -276
  78. pipecat/services/elevenlabs/tts.py +5 -5
  79. pipecat/services/fish/tts.py +2 -2
  80. pipecat/services/gemini_multimodal_live/events.py +38 -524
  81. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  82. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  83. pipecat/services/gladia/stt.py +56 -72
  84. pipecat/services/google/__init__.py +1 -0
  85. pipecat/services/google/gemini_live/__init__.py +3 -0
  86. pipecat/services/google/gemini_live/file_api.py +189 -0
  87. pipecat/services/google/gemini_live/llm.py +1582 -0
  88. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  89. pipecat/services/google/llm.py +15 -11
  90. pipecat/services/google/llm_openai.py +3 -3
  91. pipecat/services/google/llm_vertex.py +86 -16
  92. pipecat/services/google/tts.py +7 -3
  93. pipecat/services/heygen/api.py +2 -0
  94. pipecat/services/heygen/client.py +8 -4
  95. pipecat/services/heygen/video.py +2 -0
  96. pipecat/services/hume/__init__.py +5 -0
  97. pipecat/services/hume/tts.py +220 -0
  98. pipecat/services/inworld/tts.py +6 -6
  99. pipecat/services/llm_service.py +15 -5
  100. pipecat/services/lmnt/tts.py +2 -2
  101. pipecat/services/mcp_service.py +4 -2
  102. pipecat/services/mem0/memory.py +6 -5
  103. pipecat/services/mistral/llm.py +29 -8
  104. pipecat/services/moondream/vision.py +42 -16
  105. pipecat/services/neuphonic/tts.py +2 -2
  106. pipecat/services/openai/__init__.py +1 -0
  107. pipecat/services/openai/base_llm.py +27 -20
  108. pipecat/services/openai/realtime/__init__.py +0 -0
  109. pipecat/services/openai/realtime/context.py +272 -0
  110. pipecat/services/openai/realtime/events.py +1106 -0
  111. pipecat/services/openai/realtime/frames.py +37 -0
  112. pipecat/services/openai/realtime/llm.py +829 -0
  113. pipecat/services/openai/tts.py +16 -8
  114. pipecat/services/openai_realtime/__init__.py +27 -0
  115. pipecat/services/openai_realtime/azure.py +21 -0
  116. pipecat/services/openai_realtime/context.py +21 -0
  117. pipecat/services/openai_realtime/events.py +21 -0
  118. pipecat/services/openai_realtime/frames.py +21 -0
  119. pipecat/services/openai_realtime_beta/azure.py +16 -0
  120. pipecat/services/openai_realtime_beta/openai.py +17 -5
  121. pipecat/services/playht/tts.py +31 -4
  122. pipecat/services/rime/tts.py +3 -4
  123. pipecat/services/salesforce/__init__.py +9 -0
  124. pipecat/services/salesforce/llm.py +465 -0
  125. pipecat/services/sarvam/tts.py +2 -6
  126. pipecat/services/simli/video.py +2 -2
  127. pipecat/services/speechmatics/stt.py +1 -7
  128. pipecat/services/stt_service.py +34 -0
  129. pipecat/services/tavus/video.py +2 -2
  130. pipecat/services/tts_service.py +9 -9
  131. pipecat/services/vision_service.py +7 -6
  132. pipecat/tests/utils.py +4 -4
  133. pipecat/transcriptions/language.py +41 -1
  134. pipecat/transports/base_input.py +17 -42
  135. pipecat/transports/base_output.py +42 -26
  136. pipecat/transports/daily/transport.py +199 -26
  137. pipecat/transports/heygen/__init__.py +0 -0
  138. pipecat/transports/heygen/transport.py +381 -0
  139. pipecat/transports/livekit/transport.py +228 -63
  140. pipecat/transports/local/audio.py +6 -1
  141. pipecat/transports/local/tk.py +11 -2
  142. pipecat/transports/network/fastapi_websocket.py +1 -1
  143. pipecat/transports/smallwebrtc/connection.py +98 -19
  144. pipecat/transports/smallwebrtc/request_handler.py +204 -0
  145. pipecat/transports/smallwebrtc/transport.py +65 -23
  146. pipecat/transports/tavus/transport.py +23 -12
  147. pipecat/transports/websocket/client.py +41 -5
  148. pipecat/transports/websocket/fastapi.py +21 -11
  149. pipecat/transports/websocket/server.py +14 -7
  150. pipecat/transports/whatsapp/api.py +8 -0
  151. pipecat/transports/whatsapp/client.py +47 -0
  152. pipecat/utils/base_object.py +54 -22
  153. pipecat/utils/string.py +12 -1
  154. pipecat/utils/tracing/service_decorators.py +21 -21
  155. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/WHEEL +0 -0
  156. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/licenses/LICENSE +0 -0
  157. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/top_level.txt +0 -0
  158. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -20,10 +20,10 @@ from pipecat.frames.frames import (
20
20
  ErrorFrame,
21
21
  Frame,
22
22
  InterimTranscriptionFrame,
23
+ InterruptionFrame,
23
24
  LLMFullResponseEndFrame,
24
25
  LLMFullResponseStartFrame,
25
26
  StartFrame,
26
- StartInterruptionFrame,
27
27
  TextFrame,
28
28
  TranscriptionFrame,
29
29
  TTSAudioRawFrame,
@@ -319,7 +319,7 @@ class TTSService(AIService):
319
319
  and not isinstance(frame, TranscriptionFrame)
320
320
  ):
321
321
  await self._process_text_frame(frame)
322
- elif isinstance(frame, StartInterruptionFrame):
322
+ elif isinstance(frame, InterruptionFrame):
323
323
  await self._handle_interruption(frame, direction)
324
324
  await self.push_frame(frame, direction)
325
325
  elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
@@ -377,14 +377,14 @@ class TTSService(AIService):
377
377
  await super().push_frame(frame, direction)
378
378
 
379
379
  if self._push_stop_frames and (
380
- isinstance(frame, StartInterruptionFrame)
380
+ isinstance(frame, InterruptionFrame)
381
381
  or isinstance(frame, TTSStartedFrame)
382
382
  or isinstance(frame, TTSAudioRawFrame)
383
383
  or isinstance(frame, TTSStoppedFrame)
384
384
  ):
385
385
  await self._stop_frame_queue.put(frame)
386
386
 
387
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
387
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
388
388
  self._processing_text = False
389
389
  await self._text_aggregator.handle_interruption()
390
390
  for filter in self._text_filters:
@@ -465,7 +465,7 @@ class TTSService(AIService):
465
465
  )
466
466
  if isinstance(frame, TTSStartedFrame):
467
467
  has_started = True
468
- elif isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
468
+ elif isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
469
469
  has_started = False
470
470
  except asyncio.TimeoutError:
471
471
  if has_started:
@@ -550,7 +550,7 @@ class WordTTSService(TTSService):
550
550
  elif isinstance(frame, (LLMFullResponseEndFrame, EndFrame)):
551
551
  await self.flush_audio()
552
552
 
553
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
553
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
554
554
  await super()._handle_interruption(frame, direction)
555
555
  self._llm_response_started = False
556
556
  self.reset_word_timestamps()
@@ -640,7 +640,7 @@ class InterruptibleTTSService(WebsocketTTSService):
640
640
  # user interrupts we need to reconnect.
641
641
  self._bot_speaking = False
642
642
 
643
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
643
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
644
644
  await super()._handle_interruption(frame, direction)
645
645
  if self._bot_speaking:
646
646
  await self._disconnect()
@@ -712,7 +712,7 @@ class InterruptibleWordTTSService(WebsocketWordTTSService):
712
712
  # user interrupts we need to reconnect.
713
713
  self._bot_speaking = False
714
714
 
715
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
715
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
716
716
  await super()._handle_interruption(frame, direction)
717
717
  if self._bot_speaking:
718
718
  await self._disconnect()
@@ -840,7 +840,7 @@ class AudioContextWordTTSService(WebsocketWordTTSService):
840
840
  await super().cancel(frame)
841
841
  await self._stop_audio_context_task()
842
842
 
843
- async def _handle_interruption(self, frame: StartInterruptionFrame, direction: FrameDirection):
843
+ async def _handle_interruption(self, frame: InterruptionFrame, direction: FrameDirection):
844
844
  await super()._handle_interruption(frame, direction)
845
845
  await self._stop_audio_context_task()
846
846
  self._create_audio_context_task()
@@ -14,7 +14,8 @@ visual content.
14
14
  from abc import abstractmethod
15
15
  from typing import AsyncGenerator
16
16
 
17
- from pipecat.frames.frames import Frame, VisionImageRawFrame
17
+ from pipecat.frames.frames import Frame, LLMContextFrame
18
+ from pipecat.processors.aggregators.llm_context import LLMContext
18
19
  from pipecat.processors.frame_processor import FrameDirection
19
20
  from pipecat.services.ai_service import AIService
20
21
 
@@ -37,15 +38,15 @@ class VisionService(AIService):
37
38
  self._describe_text = None
38
39
 
39
40
  @abstractmethod
40
- async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, None]:
41
- """Process a vision image frame and generate results.
41
+ async def run_vision(self, context: LLMContext) -> AsyncGenerator[Frame, None]:
42
+ """Process the latest image in the context and generate results.
42
43
 
43
44
  This method must be implemented by subclasses to provide actual computer
44
45
  vision functionality such as image description, object detection, or
45
46
  visual question answering.
46
47
 
47
48
  Args:
48
- frame: The vision image frame to process, containing image data.
49
+ context: The context to process, containing image data.
49
50
 
50
51
  Yields:
51
52
  Frame: Frames containing the vision analysis results, typically TextFrame
@@ -65,9 +66,9 @@ class VisionService(AIService):
65
66
  """
66
67
  await super().process_frame(frame, direction)
67
68
 
68
- if isinstance(frame, VisionImageRawFrame):
69
+ if isinstance(frame, LLMContextFrame):
69
70
  await self.start_processing_metrics()
70
- await self.process_generator(self.run_vision(frame))
71
+ await self.process_generator(self.run_vision(frame.context))
71
72
  await self.stop_processing_metrics()
72
73
  else:
73
74
  await self.push_frame(frame, direction)
pipecat/tests/utils.py CHANGED
@@ -128,7 +128,7 @@ async def run_test(
128
128
  expected_up_frames: Optional[Sequence[type]] = None,
129
129
  ignore_start: bool = True,
130
130
  observers: Optional[List[BaseObserver]] = None,
131
- start_metadata: Optional[Dict[str, Any]] = None,
131
+ pipeline_params: Optional[PipelineParams] = None,
132
132
  send_end_frame: bool = True,
133
133
  ) -> Tuple[Sequence[Frame], Sequence[Frame]]:
134
134
  """Run a test pipeline with the specified processor and validate frame flow.
@@ -144,7 +144,7 @@ async def run_test(
144
144
  expected_up_frames: Expected frame types flowing upstream (optional).
145
145
  ignore_start: Whether to ignore StartFrames in frame validation.
146
146
  observers: Optional list of observers to attach to the pipeline.
147
- start_metadata: Optional metadata to include with the StartFrame.
147
+ pipeline_params: Optional pipeline parameters.
148
148
  send_end_frame: Whether to send an EndFrame at the end of the test.
149
149
 
150
150
  Returns:
@@ -154,7 +154,7 @@ async def run_test(
154
154
  AssertionError: If the received frames don't match the expected frame types.
155
155
  """
156
156
  observers = observers or []
157
- start_metadata = start_metadata or {}
157
+ pipeline_params = pipeline_params or PipelineParams()
158
158
 
159
159
  received_up = asyncio.Queue()
160
160
  received_down = asyncio.Queue()
@@ -173,7 +173,7 @@ async def run_test(
173
173
 
174
174
  task = PipelineTask(
175
175
  pipeline,
176
- params=PipelineParams(start_metadata=start_metadata),
176
+ params=pipeline_params,
177
177
  observers=observers,
178
178
  cancel_on_idle_timeout=False,
179
179
  )
@@ -68,6 +68,9 @@ class Language(StrEnum):
68
68
  AS = "as"
69
69
  AS_IN = "as-IN"
70
70
 
71
+ # Asturian
72
+ AST = "ast"
73
+
71
74
  # Azerbaijani
72
75
  AZ = "az"
73
76
  AZ_AZ = "az-AZ"
@@ -101,6 +104,9 @@ class Language(StrEnum):
101
104
  CA = "ca"
102
105
  CA_ES = "ca-ES"
103
106
 
107
+ # Cebuano
108
+ CEB = "ceb"
109
+
104
110
  # Mandarin Chinese
105
111
  CMN = "cmn"
106
112
  CMN_CN = "cmn-CN"
@@ -185,6 +191,9 @@ class Language(StrEnum):
185
191
  FA = "fa"
186
192
  FA_IR = "fa-IR"
187
193
 
194
+ # Fulah
195
+ FF = "ff"
196
+
188
197
  # Finnish
189
198
  FI = "fi"
190
199
  FI_FI = "fi-FI"
@@ -251,6 +260,9 @@ class Language(StrEnum):
251
260
  ID = "id"
252
261
  ID_ID = "id-ID"
253
262
 
263
+ # Igbo
264
+ IG = "ig"
265
+
254
266
  # Icelandic
255
267
  IS = "is"
256
268
  IS_IS = "is-IS"
@@ -279,6 +291,9 @@ class Language(StrEnum):
279
291
  KA = "ka"
280
292
  KA_GE = "ka-GE"
281
293
 
294
+ # Kabuverdianu
295
+ KEA = "kea"
296
+
282
297
  # Kazakh
283
298
  KK = "kk"
284
299
  KK_KZ = "kk-KZ"
@@ -295,6 +310,13 @@ class Language(StrEnum):
295
310
  KO = "ko"
296
311
  KO_KR = "ko-KR"
297
312
 
313
+ # Kurdish
314
+ KU = "ku"
315
+
316
+ # Kyrgyz
317
+ KY = "ky"
318
+ KY_KG = "ky-KG"
319
+
298
320
  # Latin
299
321
  LA = "la"
300
322
 
@@ -312,6 +334,12 @@ class Language(StrEnum):
312
334
  LT = "lt"
313
335
  LT_LT = "lt-LT"
314
336
 
337
+ # Ganda
338
+ LG = "lg"
339
+
340
+ # Luo
341
+ LUO = "luo"
342
+
315
343
  # Latvian
316
344
  LV = "lv"
317
345
  LV_LV = "lv-LV"
@@ -366,6 +394,12 @@ class Language(StrEnum):
366
394
  NL_BE = "nl-BE"
367
395
  NL_NL = "nl-NL"
368
396
 
397
+ # Northern Sotho
398
+ NSO = "nso"
399
+
400
+ # Chichewa
401
+ NY = "ny"
402
+
369
403
  # Occitan
370
404
  OC = "oc"
371
405
 
@@ -484,6 +518,9 @@ class Language(StrEnum):
484
518
  UK = "uk"
485
519
  UK_UA = "uk-UA"
486
520
 
521
+ # Umbundu
522
+ UMB = "umb"
523
+
487
524
  # Urdu
488
525
  UR = "ur"
489
526
  UR_IN = "ur-IN"
@@ -497,6 +534,9 @@ class Language(StrEnum):
497
534
  VI = "vi"
498
535
  VI_VN = "vi-VN"
499
536
 
537
+ # Wolof
538
+ WO = "wo"
539
+
500
540
  # Wu Chinese
501
541
  WUU = "wuu"
502
542
  WUU_CN = "wuu-CN"
@@ -507,7 +547,7 @@ class Language(StrEnum):
507
547
  # Yoruba
508
548
  YO = "yo"
509
549
 
510
- # Yue Chinese
550
+ # Yue Chinese (Cantonese)
511
551
  YUE = "yue"
512
552
  YUE_CN = "yue-CN"
513
553
 
@@ -11,7 +11,6 @@ input processing, including VAD, turn analysis, and interruption management.
11
11
  """
12
12
 
13
13
  import asyncio
14
- from concurrent.futures import ThreadPoolExecutor
15
14
  from typing import Optional
16
15
 
17
16
  from loguru import logger
@@ -22,7 +21,6 @@ from pipecat.audio.turn.base_turn_analyzer import (
22
21
  )
23
22
  from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADState
24
23
  from pipecat.frames.frames import (
25
- BotInterruptionFrame,
26
24
  BotStartedSpeakingFrame,
27
25
  BotStoppedSpeakingFrame,
28
26
  CancelFrame,
@@ -36,7 +34,6 @@ from pipecat.frames.frames import (
36
34
  MetricsFrame,
37
35
  SpeechControlParamsFrame,
38
36
  StartFrame,
39
- StartInterruptionFrame,
40
37
  StopFrame,
41
38
  SystemFrame,
42
39
  UserSpeakingFrame,
@@ -81,10 +78,6 @@ class BaseInputTransport(FrameProcessor):
81
78
  # Track user speaking state for interruption logic
82
79
  self._user_speaking = False
83
80
 
84
- # We read audio from a single queue one at a time and we then run VAD in
85
- # a thread. Therefore, only one thread should be necessary.
86
- self._executor = ThreadPoolExecutor(max_workers=1)
87
-
88
81
  # Task to process incoming audio (VAD) and push audio frames downstream
89
82
  # if passthrough is enabled.
90
83
  self._audio_task = None
@@ -289,8 +282,6 @@ class BaseInputTransport(FrameProcessor):
289
282
  elif isinstance(frame, CancelFrame):
290
283
  await self.cancel(frame)
291
284
  await self.push_frame(frame, direction)
292
- elif isinstance(frame, BotInterruptionFrame):
293
- await self._handle_bot_interruption(frame)
294
285
  elif isinstance(frame, BotStartedSpeakingFrame):
295
286
  await self._handle_bot_started_speaking(frame)
296
287
  await self.push_frame(frame, direction)
@@ -298,22 +289,12 @@ class BaseInputTransport(FrameProcessor):
298
289
  await self._handle_bot_stopped_speaking(frame)
299
290
  await self.push_frame(frame, direction)
300
291
  elif isinstance(frame, EmulateUserStartedSpeakingFrame):
301
- logger.debug("Emulating user started speaking")
292
+ self.logger.debug("Emulating user started speaking")
302
293
  await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
303
294
  elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
304
- logger.debug("Emulating user stopped speaking")
295
+ self.logger.debug("Emulating user stopped speaking")
305
296
  await self._handle_user_interruption(VADState.QUIET, emulated=True)
306
297
  # All other system frames
307
- elif isinstance(frame, VADParamsUpdateFrame):
308
- if self.vad_analyzer:
309
- self.vad_analyzer.set_params(frame.params, bot_logger=self.logger)
310
- speech_frame = SpeechControlParamsFrame(
311
- vad_params=frame.params,
312
- turn_params=self._params.turn_analyzer.params
313
- if self._params.turn_analyzer
314
- else None,
315
- )
316
- await self.push_frame(speech_frame)
317
298
  elif isinstance(frame, SystemFrame):
318
299
  await self.push_frame(frame, direction)
319
300
  # Control frames
@@ -325,6 +306,16 @@ class BaseInputTransport(FrameProcessor):
325
306
  elif isinstance(frame, StopFrame):
326
307
  await self.push_frame(frame, direction)
327
308
  await self.pause(frame)
309
+ elif isinstance(frame, VADParamsUpdateFrame):
310
+ if self.vad_analyzer:
311
+ self.vad_analyzer.set_params(frame.params)
312
+ speech_frame = SpeechControlParamsFrame(
313
+ vad_params=frame.params,
314
+ turn_params=self._params.turn_analyzer.params
315
+ if self._params.turn_analyzer
316
+ else None,
317
+ )
318
+ await self.push_frame(speech_frame)
328
319
  elif isinstance(frame, FilterUpdateSettingsFrame) and self._params.audio_in_filter:
329
320
  await self._params.audio_in_filter.process_frame(frame)
330
321
  # Other frames
@@ -335,13 +326,6 @@ class BaseInputTransport(FrameProcessor):
335
326
  # Handle interruptions
336
327
  #
337
328
 
338
- async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
339
- """Handle bot interruption frames."""
340
- self.logger.debug("Bot interruption")
341
- if self.interruptions_allowed:
342
- await self._start_interruption()
343
- await self.push_frame(StartInterruptionFrame())
344
-
345
329
  async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
346
330
  """Handle user interruption events based on speaking state."""
347
331
  if vad_state == VADState.SPEAKING:
@@ -353,7 +337,7 @@ class BaseInputTransport(FrameProcessor):
353
337
  await self.push_frame(downstream_frame)
354
338
  await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
355
339
 
356
- # Only push StartInterruptionFrame if:
340
+ # Only push InterruptionFrame if:
357
341
  # 1. No interruption config is set, OR
358
342
  # 2. Interruption config is set but bot is not speaking
359
343
  should_push_immediate_interruption = (
@@ -362,13 +346,9 @@ class BaseInputTransport(FrameProcessor):
362
346
 
363
347
  # Make sure we notify about interruptions quickly out-of-band.
364
348
  if should_push_immediate_interruption and self.interruptions_allowed:
365
- await self._start_interruption()
366
- # Push an out-of-band frame (i.e. not using the ordered push
367
- # frame task) to stop everything, specially at the output
368
- # transport.
369
- await self.push_frame(StartInterruptionFrame())
349
+ await self.push_interruption_task_frame_and_wait()
370
350
  elif self.interruption_strategies and self._bot_speaking:
371
- logger.debug(
351
+ self.logger.debug(
372
352
  "User started speaking while bot is speaking with interruption config - "
373
353
  "deferring interruption to aggregator"
374
354
  )
@@ -381,9 +361,6 @@ class BaseInputTransport(FrameProcessor):
381
361
  await self.push_frame(downstream_frame)
382
362
  await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
383
363
 
384
- if self.interruptions_allowed:
385
- await self._stop_interruption()
386
-
387
364
  #
388
365
  # Handle bot speaking state
389
366
  #
@@ -416,9 +393,7 @@ class BaseInputTransport(FrameProcessor):
416
393
  """Analyze audio frame for voice activity."""
417
394
  state = VADState.QUIET
418
395
  if self.vad_analyzer:
419
- state = await self.get_event_loop().run_in_executor(
420
- self._executor, self.vad_analyzer.analyze_audio, audio_frame.audio
421
- )
396
+ state = await self.vad_analyzer.analyze_audio(audio_frame.audio)
422
397
  return state
423
398
 
424
399
  async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
@@ -511,7 +486,7 @@ class BaseInputTransport(FrameProcessor):
511
486
  self._audio_in_queue.task_done()
512
487
  except asyncio.TimeoutError:
513
488
  if self._user_speaking:
514
- logger.warning(
489
+ self.logger.warning(
515
490
  "Forcing user stopped speaking due to timeout receiving audio frame!"
516
491
  )
517
492
  vad_state = VADState.QUIET
@@ -29,20 +29,19 @@ from pipecat.frames.frames import (
29
29
  CancelFrame,
30
30
  EndFrame,
31
31
  Frame,
32
- InputTransportMessageUrgentFrame,
32
+ InterruptionFrame,
33
33
  MixerControlFrame,
34
34
  OutputAudioRawFrame,
35
35
  OutputDTMFFrame,
36
36
  OutputDTMFUrgentFrame,
37
37
  OutputImageRawFrame,
38
+ OutputTransportMessageFrame,
39
+ OutputTransportMessageUrgentFrame,
38
40
  OutputTransportReadyFrame,
39
41
  SpeechOutputAudioRawFrame,
40
42
  SpriteFrame,
41
43
  StartFrame,
42
- StartInterruptionFrame,
43
44
  SystemFrame,
44
- TransportMessageFrame,
45
- TransportMessageUrgentFrame,
46
45
  TTSAudioRawFrame,
47
46
  )
48
47
  from pipecat.processors.frame_processor import FrameDirection, FrameProcessor
@@ -179,7 +178,9 @@ class BaseOutputTransport(FrameProcessor):
179
178
  # Sending a frame indicating that the output transport is ready and able to receive frames.
180
179
  await self.push_frame(OutputTransportReadyFrame(), FrameDirection.UPSTREAM)
181
180
 
182
- async def send_message(self, frame: TransportMessageFrame | TransportMessageUrgentFrame):
181
+ async def send_message(
182
+ self, frame: OutputTransportMessageFrame | OutputTransportMessageUrgentFrame
183
+ ):
183
184
  """Send a transport message.
184
185
 
185
186
  Args:
@@ -203,21 +204,27 @@ class BaseOutputTransport(FrameProcessor):
203
204
  """
204
205
  pass
205
206
 
206
- async def write_video_frame(self, frame: OutputImageRawFrame):
207
+ async def write_video_frame(self, frame: OutputImageRawFrame) -> bool:
207
208
  """Write a video frame to the transport.
208
209
 
209
210
  Args:
210
211
  frame: The output video frame to write.
212
+
213
+ Returns:
214
+ True if the video frame was written successfully, False otherwise.
211
215
  """
212
- pass
216
+ return False
213
217
 
214
- async def write_audio_frame(self, frame: OutputAudioRawFrame):
218
+ async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
215
219
  """Write an audio frame to the transport.
216
220
 
217
221
  Args:
218
222
  frame: The output audio frame to write.
223
+
224
+ Returns:
225
+ True if the audio frame was written successfully, False otherwise.
219
226
  """
220
- pass
227
+ return False
221
228
 
222
229
  async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
223
230
  """Write a DTMF tone using the transport's preferred method.
@@ -288,9 +295,8 @@ class BaseOutputTransport(FrameProcessor):
288
295
  await super().process_frame(frame, direction)
289
296
 
290
297
  #
291
- # System frames (like StartInterruptionFrame) are pushed
292
- # immediately. Other frames require order so they are put in the sink
293
- # queue.
298
+ # System frames (like InterruptionFrame) are pushed immediately. Other
299
+ # frames require order so they are put in the sink queue.
294
300
  #
295
301
  if isinstance(frame, StartFrame):
296
302
  # Push StartFrame before start(), because we want StartFrame to be
@@ -300,12 +306,10 @@ class BaseOutputTransport(FrameProcessor):
300
306
  elif isinstance(frame, CancelFrame):
301
307
  await self.cancel(frame)
302
308
  await self.push_frame(frame, direction)
303
- elif isinstance(frame, StartInterruptionFrame):
309
+ elif isinstance(frame, InterruptionFrame):
304
310
  await self.push_frame(frame, direction)
305
311
  await self._handle_frame(frame)
306
- elif isinstance(frame, TransportMessageUrgentFrame) and not isinstance(
307
- frame, InputTransportMessageUrgentFrame
308
- ):
312
+ elif isinstance(frame, OutputTransportMessageUrgentFrame):
309
313
  await self.send_message(frame)
310
314
  elif isinstance(frame, OutputDTMFUrgentFrame):
311
315
  await self.write_dtmf(frame)
@@ -341,7 +345,7 @@ class BaseOutputTransport(FrameProcessor):
341
345
 
342
346
  sender = self._media_senders[frame.transport_destination]
343
347
 
344
- if isinstance(frame, StartInterruptionFrame):
348
+ if isinstance(frame, InterruptionFrame):
345
349
  await sender.handle_interruptions(frame)
346
350
  elif isinstance(frame, OutputAudioRawFrame):
347
351
  await sender.handle_audio_frame(frame)
@@ -492,7 +496,7 @@ class BaseOutputTransport(FrameProcessor):
492
496
  await self._cancel_clock_task()
493
497
  await self._cancel_video_task()
494
498
 
495
- async def handle_interruptions(self, _: StartInterruptionFrame):
499
+ async def handle_interruptions(self, _: InterruptionFrame):
496
500
  """Handle interruption events by restarting tasks and clearing buffers.
497
501
 
498
502
  Args:
@@ -642,7 +646,7 @@ class BaseOutputTransport(FrameProcessor):
642
646
  await self._set_video_image(frame)
643
647
  elif isinstance(frame, SpriteFrame):
644
648
  await self._set_video_images(frame.images)
645
- elif isinstance(frame, TransportMessageFrame):
649
+ elif isinstance(frame, OutputTransportMessageFrame):
646
650
  await self._transport.send_message(frame)
647
651
  elif isinstance(frame, OutputDTMFFrame):
648
652
  await self._transport.write_dtmf(frame)
@@ -661,6 +665,7 @@ class BaseOutputTransport(FrameProcessor):
661
665
  self._audio_queue.get(), timeout=vad_stop_secs
662
666
  )
663
667
  yield frame
668
+ self._audio_queue.task_done()
664
669
  except asyncio.TimeoutError:
665
670
  # Notify the bot stopped speaking upstream if necessary.
666
671
  await self._bot_stopped_speaking()
@@ -673,8 +678,9 @@ class BaseOutputTransport(FrameProcessor):
673
678
  frame = self._audio_queue.get_nowait()
674
679
  if isinstance(frame, OutputAudioRawFrame):
675
680
  frame.audio = await self._mixer.mix(frame.audio)
676
- last_frame_time = time.time()
681
+ last_frame_time = time.time()
677
682
  yield frame
683
+ self._audio_queue.task_done()
678
684
  except asyncio.QueueEmpty:
679
685
  # Notify the bot stopped speaking upstream if necessary.
680
686
  diff_time = time.time() - last_frame_time
@@ -740,12 +746,22 @@ class BaseOutputTransport(FrameProcessor):
740
746
  # Handle frame.
741
747
  await self._handle_frame(frame)
742
748
 
743
- # Also, push frame downstream in case anyone else needs it.
744
- await self._transport.push_frame(frame)
745
-
746
- # Send audio.
747
- if isinstance(frame, OutputAudioRawFrame):
748
- await self._transport.write_audio_frame(frame)
749
+ # If we are not able to write to the transport we shouldn't
750
+ # pushb downstream.
751
+ push_downstream = True
752
+
753
+ # Try to send audio to the transport.
754
+ try:
755
+ if isinstance(frame, OutputAudioRawFrame):
756
+ push_downstream = await self._transport.write_audio_frame(frame)
757
+ except Exception as e:
758
+ logger.error(f"{self} Error writing {frame} to transport: {e}")
759
+ push_downstream = False
760
+
761
+ # If we were able to send to the transport, push the frame
762
+ # downstream in case anyone else needs it.
763
+ if push_downstream:
764
+ await self._transport.push_frame(frame)
749
765
 
750
766
  #
751
767
  # Video handling