dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -14,7 +14,8 @@ visual content.
14
14
  from abc import abstractmethod
15
15
  from typing import AsyncGenerator
16
16
 
17
- from pipecat.frames.frames import Frame, VisionImageRawFrame
17
+ from pipecat.frames.frames import Frame, LLMContextFrame
18
+ from pipecat.processors.aggregators.llm_context import LLMContext
18
19
  from pipecat.processors.frame_processor import FrameDirection
19
20
  from pipecat.services.ai_service import AIService
20
21
 
@@ -37,15 +38,15 @@ class VisionService(AIService):
37
38
  self._describe_text = None
38
39
 
39
40
  @abstractmethod
40
- async def run_vision(self, frame: VisionImageRawFrame) -> AsyncGenerator[Frame, None]:
41
- """Process a vision image frame and generate results.
41
+ async def run_vision(self, context: LLMContext) -> AsyncGenerator[Frame, None]:
42
+ """Process the latest image in the context and generate results.
42
43
 
43
44
  This method must be implemented by subclasses to provide actual computer
44
45
  vision functionality such as image description, object detection, or
45
46
  visual question answering.
46
47
 
47
48
  Args:
48
- frame: The vision image frame to process, containing image data.
49
+ context: The context to process, containing image data.
49
50
 
50
51
  Yields:
51
52
  Frame: Frames containing the vision analysis results, typically TextFrame
@@ -65,9 +66,9 @@ class VisionService(AIService):
65
66
  """
66
67
  await super().process_frame(frame, direction)
67
68
 
68
- if isinstance(frame, VisionImageRawFrame):
69
+ if isinstance(frame, LLMContextFrame):
69
70
  await self.start_processing_metrics()
70
- await self.process_generator(self.run_vision(frame))
71
+ await self.process_generator(self.run_vision(frame.context))
71
72
  await self.stop_processing_metrics()
72
73
  else:
73
74
  await self.push_frame(frame, direction)
@@ -2,10 +2,11 @@
2
2
 
3
3
  import asyncio
4
4
  import json
5
+ import random
5
6
  import time
6
7
  import uuid
7
8
  from dataclasses import dataclass
8
- from typing import Any, AsyncGenerator, Dict, Optional
9
+ from typing import Any, AsyncGenerator, Dict, List, Optional
9
10
  from urllib.parse import urlencode
10
11
 
11
12
  import httpx
@@ -13,13 +14,15 @@ from loguru import logger
13
14
  from pydantic import BaseModel, Field
14
15
 
15
16
  from pipecat.frames.frames import (
17
+ CancelFrame,
18
+ EndFrame,
16
19
  Frame,
20
+ InterruptionFrame,
17
21
  LLMFullResponseEndFrame,
18
22
  LLMFullResponseStartFrame,
19
23
  LLMMessagesFrame,
20
24
  LLMTextFrame,
21
25
  LLMUpdateSettingsFrame,
22
- StartInterruptionFrame,
23
26
  )
24
27
  from pipecat.processors.aggregators.llm_response import (
25
28
  LLMAssistantAggregatorParams,
@@ -53,12 +56,13 @@ class VistaarLLMService(LLMService):
53
56
  source_lang: Source language code (e.g., 'mr' for Marathi, 'hi' for Hindi).
54
57
  target_lang: Target language code for responses.
55
58
  session_id: Session ID for maintaining conversation context.
56
- extra: Additional model-specific parameters.
59
+ extra: Additional model-specific parameters
57
60
  """
58
61
 
59
62
  source_lang: Optional[str] = Field(default="mr")
60
63
  target_lang: Optional[str] = Field(default="mr")
61
64
  session_id: Optional[str] = Field(default=None)
65
+ pre_query_response_phrases: Optional[List[str]] = Field(default_factory=list)
62
66
  extra: Optional[Dict[str, Any]] = Field(default_factory=dict)
63
67
 
64
68
  def __init__(
@@ -68,7 +72,6 @@ class VistaarLLMService(LLMService):
68
72
  params: Optional[InputParams] = None,
69
73
  timeout: float = 30.0,
70
74
  interim_timeout: float = 5.0,
71
- interim_message: str = "एक क्षण थांबा, मी बघतो. ",
72
75
  **kwargs,
73
76
  ):
74
77
  """Initialize Vistaar LLM service.
@@ -77,8 +80,7 @@ class VistaarLLMService(LLMService):
77
80
  base_url: The base URL for Vistaar API. Defaults to "https://vistaar.kenpath.ai/api".
78
81
  params: Input parameters for model configuration and behavior.
79
82
  timeout: Request timeout in seconds. Defaults to 30.0 seconds.
80
- interim_timeout: Time in seconds before sending interim message. Defaults to 3.0 seconds.
81
- interim_message: Message to send if API takes longer than interim_timeout. Defaults to "एक क्षण थांबा, मी बघतो. ".
83
+ interim_timeout: Time in seconds before sending interim message. Defaults to 5.0 seconds.
82
84
  **kwargs: Additional arguments passed to the parent LLMService.
83
85
  """
84
86
  super().__init__(**kwargs)
@@ -89,10 +91,10 @@ class VistaarLLMService(LLMService):
89
91
  self._source_lang = params.source_lang
90
92
  self._target_lang = params.target_lang
91
93
  self._session_id = params.session_id or str(uuid.uuid4())
94
+ self._pre_query_response_phrases = params.pre_query_response_phrases or []
92
95
  self._extra = params.extra if isinstance(params.extra, dict) else {}
93
96
  self._timeout = timeout
94
97
  self._interim_timeout = interim_timeout
95
- self._interim_message = interim_message
96
98
 
97
99
  # Create an async HTTP client
98
100
  self._client = httpx.AsyncClient(timeout=httpx.Timeout(self._timeout), verify=False)
@@ -103,6 +105,8 @@ class VistaarLLMService(LLMService):
103
105
  self._partial_response = [] # Track what was actually sent before interruption
104
106
  self._interim_sent = False # Track if interim message was sent
105
107
  self._interim_task = None # Track interim message task
108
+ self._interim_completion_event = asyncio.Event() # Track interim message completion
109
+ self._interim_in_progress = False # Track if interim message is being spoken
106
110
 
107
111
  logger.info(
108
112
  f"Vistaar LLM initialized - Base URL: {self._base_url}, Session ID: {self._session_id}, Source Lang: {self._source_lang}, Target Lang: {self._target_lang}, Timeout: {self._timeout}s"
@@ -161,6 +165,10 @@ class VistaarLLMService(LLMService):
161
165
  # Set interruption flag
162
166
  self._is_interrupted = True
163
167
 
168
+ # Reset interim state on interruption
169
+ self._interim_in_progress = False
170
+ self._interim_completion_event.set() # Unblock any waiting LLM responses
171
+
164
172
  # Cancel interim message task if active
165
173
  await self._cancel_interim_message_task(
166
174
  "Cancelled interim message task - handling interruption"
@@ -193,11 +201,28 @@ class VistaarLLMService(LLMService):
193
201
  if not self._is_interrupted and not self._interim_sent:
194
202
  logger.info(f"Sending interim message after {self._interim_timeout}s timeout")
195
203
  self._interim_sent = True
196
- await self.push_frame(LLMTextFrame(text=self._interim_message))
204
+ self._interim_in_progress = True
205
+
206
+ # Use random selection from pre_query_response_phrases if available, otherwise fallback to default
207
+ if self._pre_query_response_phrases:
208
+ message = random.choice(self._pre_query_response_phrases)
209
+ else:
210
+ message = "एक क्षण थांबा, मी बघतो. "
211
+
212
+ await self.push_frame(LLMTextFrame(text=message))
213
+
214
+ # Wait for estimated TTS duration before marking as complete
215
+ estimated_tts_duration = max(2.0, len(message) * 0.08) # ~80ms per character
216
+ logger.info(f"Waiting {estimated_tts_duration:.2f}s for interim TTS completion")
217
+ await asyncio.sleep(estimated_tts_duration)
197
218
  except asyncio.CancelledError:
198
219
  logger.debug("Interim message task cancelled")
199
220
  except Exception as e:
200
221
  logger.error(f"Error sending interim message: {e}")
222
+ finally:
223
+ # Signal that interim message handling is complete
224
+ self._interim_completion_event.set()
225
+ self._interim_in_progress = False
201
226
 
202
227
  async def _stream_response(self, query: str) -> AsyncGenerator[str, None]:
203
228
  """Stream response from Vistaar API using Server-Sent Events.
@@ -231,6 +256,8 @@ class VistaarLLMService(LLMService):
231
256
  self._is_interrupted = False
232
257
  self._partial_response = []
233
258
  self._interim_sent = False
259
+ self._interim_in_progress = False
260
+ self._interim_completion_event.clear() # Reset the event for new request
234
261
 
235
262
  try:
236
263
  # Use httpx to handle SSE streaming
@@ -291,6 +318,7 @@ class VistaarLLMService(LLMService):
291
318
 
292
319
  # Start response
293
320
  await self.push_frame(LLMFullResponseStartFrame())
321
+ await self.push_frame(LLMFullResponseStartFrame(), FrameDirection.UPSTREAM)
294
322
  await self.start_processing_metrics()
295
323
  await self.start_ttfb_metrics()
296
324
 
@@ -307,6 +335,15 @@ class VistaarLLMService(LLMService):
307
335
  if first_chunk:
308
336
  await self.stop_ttfb_metrics()
309
337
  first_chunk = False
338
+
339
+ # Wait for interim message to complete if it was sent and is in progress
340
+ if self._interim_sent:
341
+ logger.debug(
342
+ "Waiting for interim message completion before sending LLM response"
343
+ )
344
+ await self._interim_completion_event.wait()
345
+ logger.debug("Interim message completed, proceeding with LLM response")
346
+
310
347
  # Cancel interim message task since we got first response
311
348
  await self._cancel_interim_message_task(
312
349
  "Cancelled interim message task - got first response"
@@ -334,6 +371,7 @@ class VistaarLLMService(LLMService):
334
371
  )
335
372
  await self.stop_processing_metrics()
336
373
  await self.push_frame(LLMFullResponseEndFrame())
374
+ await self.push_frame(LLMFullResponseEndFrame(), FrameDirection.UPSTREAM)
337
375
 
338
376
  async def process_frame(self, frame: Frame, direction: FrameDirection):
339
377
  """Process frames for LLM completion requests.
@@ -353,7 +391,7 @@ class VistaarLLMService(LLMService):
353
391
  )
354
392
  await self.push_frame(frame, direction)
355
393
  return
356
- elif isinstance(frame, StartInterruptionFrame):
394
+ elif isinstance(frame, InterruptionFrame):
357
395
  await self._handle_interruption()
358
396
  await self.push_frame(frame, direction)
359
397
  return
@@ -426,3 +464,7 @@ class VistaarLLMService(LLMService):
426
464
  await self.cancel_task(self._interim_task)
427
465
  self._interim_task = None
428
466
  logger.debug(message)
467
+
468
+ def can_generate_metrics(self) -> bool:
469
+ """Check if this service can generate processing metrics."""
470
+ return True
pipecat/tests/utils.py CHANGED
@@ -128,7 +128,7 @@ async def run_test(
128
128
  expected_up_frames: Optional[Sequence[type]] = None,
129
129
  ignore_start: bool = True,
130
130
  observers: Optional[List[BaseObserver]] = None,
131
- start_metadata: Optional[Dict[str, Any]] = None,
131
+ pipeline_params: Optional[PipelineParams] = None,
132
132
  send_end_frame: bool = True,
133
133
  ) -> Tuple[Sequence[Frame], Sequence[Frame]]:
134
134
  """Run a test pipeline with the specified processor and validate frame flow.
@@ -144,7 +144,7 @@ async def run_test(
144
144
  expected_up_frames: Expected frame types flowing upstream (optional).
145
145
  ignore_start: Whether to ignore StartFrames in frame validation.
146
146
  observers: Optional list of observers to attach to the pipeline.
147
- start_metadata: Optional metadata to include with the StartFrame.
147
+ pipeline_params: Optional pipeline parameters.
148
148
  send_end_frame: Whether to send an EndFrame at the end of the test.
149
149
 
150
150
  Returns:
@@ -154,7 +154,7 @@ async def run_test(
154
154
  AssertionError: If the received frames don't match the expected frame types.
155
155
  """
156
156
  observers = observers or []
157
- start_metadata = start_metadata or {}
157
+ pipeline_params = pipeline_params or PipelineParams()
158
158
 
159
159
  received_up = asyncio.Queue()
160
160
  received_down = asyncio.Queue()
@@ -173,7 +173,7 @@ async def run_test(
173
173
 
174
174
  task = PipelineTask(
175
175
  pipeline,
176
- params=PipelineParams(start_metadata=start_metadata),
176
+ params=pipeline_params,
177
177
  observers=observers,
178
178
  cancel_on_idle_timeout=False,
179
179
  )
@@ -68,6 +68,9 @@ class Language(StrEnum):
68
68
  AS = "as"
69
69
  AS_IN = "as-IN"
70
70
 
71
+ # Asturian
72
+ AST = "ast"
73
+
71
74
  # Azerbaijani
72
75
  AZ = "az"
73
76
  AZ_AZ = "az-AZ"
@@ -101,6 +104,9 @@ class Language(StrEnum):
101
104
  CA = "ca"
102
105
  CA_ES = "ca-ES"
103
106
 
107
+ # Cebuano
108
+ CEB = "ceb"
109
+
104
110
  # Mandarin Chinese
105
111
  CMN = "cmn"
106
112
  CMN_CN = "cmn-CN"
@@ -185,6 +191,9 @@ class Language(StrEnum):
185
191
  FA = "fa"
186
192
  FA_IR = "fa-IR"
187
193
 
194
+ # Fulah
195
+ FF = "ff"
196
+
188
197
  # Finnish
189
198
  FI = "fi"
190
199
  FI_FI = "fi-FI"
@@ -251,6 +260,9 @@ class Language(StrEnum):
251
260
  ID = "id"
252
261
  ID_ID = "id-ID"
253
262
 
263
+ # Igbo
264
+ IG = "ig"
265
+
254
266
  # Icelandic
255
267
  IS = "is"
256
268
  IS_IS = "is-IS"
@@ -279,6 +291,9 @@ class Language(StrEnum):
279
291
  KA = "ka"
280
292
  KA_GE = "ka-GE"
281
293
 
294
+ # Kabuverdianu
295
+ KEA = "kea"
296
+
282
297
  # Kazakh
283
298
  KK = "kk"
284
299
  KK_KZ = "kk-KZ"
@@ -295,6 +310,13 @@ class Language(StrEnum):
295
310
  KO = "ko"
296
311
  KO_KR = "ko-KR"
297
312
 
313
+ # Kurdish
314
+ KU = "ku"
315
+
316
+ # Kyrgyz
317
+ KY = "ky"
318
+ KY_KG = "ky-KG"
319
+
298
320
  # Latin
299
321
  LA = "la"
300
322
 
@@ -312,6 +334,12 @@ class Language(StrEnum):
312
334
  LT = "lt"
313
335
  LT_LT = "lt-LT"
314
336
 
337
+ # Ganda
338
+ LG = "lg"
339
+
340
+ # Luo
341
+ LUO = "luo"
342
+
315
343
  # Latvian
316
344
  LV = "lv"
317
345
  LV_LV = "lv-LV"
@@ -366,6 +394,12 @@ class Language(StrEnum):
366
394
  NL_BE = "nl-BE"
367
395
  NL_NL = "nl-NL"
368
396
 
397
+ # Northern Sotho
398
+ NSO = "nso"
399
+
400
+ # Chichewa
401
+ NY = "ny"
402
+
369
403
  # Occitan
370
404
  OC = "oc"
371
405
 
@@ -484,6 +518,9 @@ class Language(StrEnum):
484
518
  UK = "uk"
485
519
  UK_UA = "uk-UA"
486
520
 
521
+ # Umbundu
522
+ UMB = "umb"
523
+
487
524
  # Urdu
488
525
  UR = "ur"
489
526
  UR_IN = "ur-IN"
@@ -497,6 +534,9 @@ class Language(StrEnum):
497
534
  VI = "vi"
498
535
  VI_VN = "vi-VN"
499
536
 
537
+ # Wolof
538
+ WO = "wo"
539
+
500
540
  # Wu Chinese
501
541
  WUU = "wuu"
502
542
  WUU_CN = "wuu-CN"
@@ -507,7 +547,7 @@ class Language(StrEnum):
507
547
  # Yoruba
508
548
  YO = "yo"
509
549
 
510
- # Yue Chinese
550
+ # Yue Chinese (Cantonese)
511
551
  YUE = "yue"
512
552
  YUE_CN = "yue-CN"
513
553
 
@@ -11,7 +11,6 @@ input processing, including VAD, turn analysis, and interruption management.
11
11
  """
12
12
 
13
13
  import asyncio
14
- from concurrent.futures import ThreadPoolExecutor
15
14
  from typing import Optional
16
15
 
17
16
  from loguru import logger
@@ -22,7 +21,6 @@ from pipecat.audio.turn.base_turn_analyzer import (
22
21
  )
23
22
  from pipecat.audio.vad.vad_analyzer import VADAnalyzer, VADState
24
23
  from pipecat.frames.frames import (
25
- BotInterruptionFrame,
26
24
  BotStartedSpeakingFrame,
27
25
  BotStoppedSpeakingFrame,
28
26
  CancelFrame,
@@ -36,7 +34,6 @@ from pipecat.frames.frames import (
36
34
  MetricsFrame,
37
35
  SpeechControlParamsFrame,
38
36
  StartFrame,
39
- StartInterruptionFrame,
40
37
  StopFrame,
41
38
  SystemFrame,
42
39
  UserSpeakingFrame,
@@ -81,10 +78,6 @@ class BaseInputTransport(FrameProcessor):
81
78
  # Track user speaking state for interruption logic
82
79
  self._user_speaking = False
83
80
 
84
- # We read audio from a single queue one at a time and we then run VAD in
85
- # a thread. Therefore, only one thread should be necessary.
86
- self._executor = ThreadPoolExecutor(max_workers=1)
87
-
88
81
  # Task to process incoming audio (VAD) and push audio frames downstream
89
82
  # if passthrough is enabled.
90
83
  self._audio_task = None
@@ -239,6 +232,9 @@ class BaseInputTransport(FrameProcessor):
239
232
  """
240
233
  # Cancel and wait for the audio input task to finish.
241
234
  await self._cancel_audio_task()
235
+ # Stop audio filter.
236
+ if self._params.audio_in_filter:
237
+ await self._params.audio_in_filter.stop()
242
238
 
243
239
  async def set_transport_ready(self, frame: StartFrame):
244
240
  """Called when the transport is ready to stream.
@@ -289,8 +285,6 @@ class BaseInputTransport(FrameProcessor):
289
285
  elif isinstance(frame, CancelFrame):
290
286
  await self.cancel(frame)
291
287
  await self.push_frame(frame, direction)
292
- elif isinstance(frame, BotInterruptionFrame):
293
- await self._handle_bot_interruption(frame)
294
288
  elif isinstance(frame, BotStartedSpeakingFrame):
295
289
  await self._handle_bot_started_speaking(frame)
296
290
  await self.push_frame(frame, direction)
@@ -298,15 +292,14 @@ class BaseInputTransport(FrameProcessor):
298
292
  await self._handle_bot_stopped_speaking(frame)
299
293
  await self.push_frame(frame, direction)
300
294
  elif isinstance(frame, EmulateUserStartedSpeakingFrame):
301
- logger.debug("Emulating user started speaking")
295
+ self.logger.debug("Emulating user started speaking")
302
296
  await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
303
297
  elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
304
- logger.debug("Emulating user stopped speaking")
298
+ self.logger.debug("Emulating user stopped speaking")
305
299
  await self._handle_user_interruption(VADState.QUIET, emulated=True)
306
- # All other system frames
307
300
  elif isinstance(frame, VADParamsUpdateFrame):
308
301
  if self.vad_analyzer:
309
- self.vad_analyzer.set_params(frame.params, bot_logger=self.logger)
302
+ self.vad_analyzer.set_params(frame.params)
310
303
  speech_frame = SpeechControlParamsFrame(
311
304
  vad_params=frame.params,
312
305
  turn_params=self._params.turn_analyzer.params
@@ -314,6 +307,8 @@ class BaseInputTransport(FrameProcessor):
314
307
  else None,
315
308
  )
316
309
  await self.push_frame(speech_frame)
310
+ await self.push_frame(frame, direction)
311
+ # All other system frames
317
312
  elif isinstance(frame, SystemFrame):
318
313
  await self.push_frame(frame, direction)
319
314
  # Control frames
@@ -335,13 +330,6 @@ class BaseInputTransport(FrameProcessor):
335
330
  # Handle interruptions
336
331
  #
337
332
 
338
- async def _handle_bot_interruption(self, frame: BotInterruptionFrame):
339
- """Handle bot interruption frames."""
340
- self.logger.debug("Bot interruption")
341
- if self.interruptions_allowed:
342
- await self._start_interruption()
343
- await self.push_frame(StartInterruptionFrame())
344
-
345
333
  async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
346
334
  """Handle user interruption events based on speaking state."""
347
335
  if vad_state == VADState.SPEAKING:
@@ -353,7 +341,7 @@ class BaseInputTransport(FrameProcessor):
353
341
  await self.push_frame(downstream_frame)
354
342
  await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
355
343
 
356
- # Only push StartInterruptionFrame if:
344
+ # Only push InterruptionFrame if:
357
345
  # 1. No interruption config is set, OR
358
346
  # 2. Interruption config is set but bot is not speaking
359
347
  should_push_immediate_interruption = (
@@ -362,13 +350,9 @@ class BaseInputTransport(FrameProcessor):
362
350
 
363
351
  # Make sure we notify about interruptions quickly out-of-band.
364
352
  if should_push_immediate_interruption and self.interruptions_allowed:
365
- await self._start_interruption()
366
- # Push an out-of-band frame (i.e. not using the ordered push
367
- # frame task) to stop everything, specially at the output
368
- # transport.
369
- await self.push_frame(StartInterruptionFrame())
353
+ await self.push_interruption_task_frame_and_wait()
370
354
  elif self.interruption_strategies and self._bot_speaking:
371
- logger.debug(
355
+ self.logger.debug(
372
356
  "User started speaking while bot is speaking with interruption config - "
373
357
  "deferring interruption to aggregator"
374
358
  )
@@ -381,9 +365,6 @@ class BaseInputTransport(FrameProcessor):
381
365
  await self.push_frame(downstream_frame)
382
366
  await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
383
367
 
384
- if self.interruptions_allowed:
385
- await self._stop_interruption()
386
-
387
368
  #
388
369
  # Handle bot speaking state
389
370
  #
@@ -416,9 +397,7 @@ class BaseInputTransport(FrameProcessor):
416
397
  """Analyze audio frame for voice activity."""
417
398
  state = VADState.QUIET
418
399
  if self.vad_analyzer:
419
- state = await self.get_event_loop().run_in_executor(
420
- self._executor, self.vad_analyzer.analyze_audio, audio_frame.audio
421
- )
400
+ state = await self.vad_analyzer.analyze_audio(audio_frame.audio)
422
401
  return state
423
402
 
424
403
  async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
@@ -511,7 +490,7 @@ class BaseInputTransport(FrameProcessor):
511
490
  self._audio_in_queue.task_done()
512
491
  except asyncio.TimeoutError:
513
492
  if self._user_speaking:
514
- logger.warning(
493
+ self.logger.warning(
515
494
  "Forcing user stopped speaking due to timeout receiving audio frame!"
516
495
  )
517
496
  vad_state = VADState.QUIET