dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (106) hide show
  1. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
  2. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
  3. pipecat/adapters/base_llm_adapter.py +44 -6
  4. pipecat/adapters/services/anthropic_adapter.py +302 -2
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
  6. pipecat/adapters/services/bedrock_adapter.py +40 -2
  7. pipecat/adapters/services/gemini_adapter.py +276 -6
  8. pipecat/adapters/services/open_ai_adapter.py +88 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
  10. pipecat/audio/dtmf/__init__.py +0 -0
  11. pipecat/audio/dtmf/types.py +47 -0
  12. pipecat/audio/dtmf/utils.py +70 -0
  13. pipecat/audio/filters/aic_filter.py +199 -0
  14. pipecat/audio/utils.py +9 -7
  15. pipecat/extensions/ivr/__init__.py +0 -0
  16. pipecat/extensions/ivr/ivr_navigator.py +452 -0
  17. pipecat/frames/frames.py +156 -43
  18. pipecat/pipeline/llm_switcher.py +76 -0
  19. pipecat/pipeline/parallel_pipeline.py +3 -3
  20. pipecat/pipeline/service_switcher.py +144 -0
  21. pipecat/pipeline/task.py +68 -28
  22. pipecat/pipeline/task_observer.py +10 -0
  23. pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
  24. pipecat/processors/aggregators/llm_context.py +277 -0
  25. pipecat/processors/aggregators/llm_response.py +48 -15
  26. pipecat/processors/aggregators/llm_response_universal.py +840 -0
  27. pipecat/processors/aggregators/openai_llm_context.py +3 -3
  28. pipecat/processors/dtmf_aggregator.py +0 -2
  29. pipecat/processors/filters/stt_mute_filter.py +0 -2
  30. pipecat/processors/frame_processor.py +18 -11
  31. pipecat/processors/frameworks/rtvi.py +17 -10
  32. pipecat/processors/metrics/sentry.py +2 -0
  33. pipecat/runner/daily.py +137 -36
  34. pipecat/runner/run.py +1 -1
  35. pipecat/runner/utils.py +7 -7
  36. pipecat/serializers/asterisk.py +20 -4
  37. pipecat/serializers/exotel.py +1 -1
  38. pipecat/serializers/plivo.py +1 -1
  39. pipecat/serializers/telnyx.py +1 -1
  40. pipecat/serializers/twilio.py +1 -1
  41. pipecat/services/__init__.py +2 -2
  42. pipecat/services/anthropic/llm.py +113 -28
  43. pipecat/services/asyncai/tts.py +4 -0
  44. pipecat/services/aws/llm.py +82 -8
  45. pipecat/services/aws/tts.py +0 -10
  46. pipecat/services/aws_nova_sonic/aws.py +5 -0
  47. pipecat/services/cartesia/tts.py +28 -16
  48. pipecat/services/cerebras/llm.py +15 -10
  49. pipecat/services/deepgram/stt.py +8 -0
  50. pipecat/services/deepseek/llm.py +13 -8
  51. pipecat/services/fireworks/llm.py +13 -8
  52. pipecat/services/fish/tts.py +8 -6
  53. pipecat/services/gemini_multimodal_live/gemini.py +5 -0
  54. pipecat/services/gladia/config.py +7 -1
  55. pipecat/services/gladia/stt.py +23 -15
  56. pipecat/services/google/llm.py +159 -59
  57. pipecat/services/google/llm_openai.py +18 -3
  58. pipecat/services/grok/llm.py +2 -1
  59. pipecat/services/llm_service.py +38 -3
  60. pipecat/services/mem0/memory.py +2 -1
  61. pipecat/services/mistral/llm.py +5 -6
  62. pipecat/services/nim/llm.py +2 -1
  63. pipecat/services/openai/base_llm.py +88 -26
  64. pipecat/services/openai/image.py +6 -1
  65. pipecat/services/openai_realtime_beta/openai.py +5 -2
  66. pipecat/services/openpipe/llm.py +6 -8
  67. pipecat/services/perplexity/llm.py +13 -8
  68. pipecat/services/playht/tts.py +9 -6
  69. pipecat/services/rime/tts.py +1 -1
  70. pipecat/services/sambanova/llm.py +18 -13
  71. pipecat/services/sarvam/tts.py +415 -10
  72. pipecat/services/speechmatics/stt.py +2 -2
  73. pipecat/services/tavus/video.py +1 -1
  74. pipecat/services/tts_service.py +15 -5
  75. pipecat/services/vistaar/llm.py +2 -5
  76. pipecat/transports/base_input.py +32 -19
  77. pipecat/transports/base_output.py +39 -5
  78. pipecat/transports/daily/__init__.py +0 -0
  79. pipecat/transports/daily/transport.py +2371 -0
  80. pipecat/transports/daily/utils.py +410 -0
  81. pipecat/transports/livekit/__init__.py +0 -0
  82. pipecat/transports/livekit/transport.py +1042 -0
  83. pipecat/transports/network/fastapi_websocket.py +12 -546
  84. pipecat/transports/network/small_webrtc.py +12 -922
  85. pipecat/transports/network/webrtc_connection.py +9 -595
  86. pipecat/transports/network/websocket_client.py +12 -481
  87. pipecat/transports/network/websocket_server.py +12 -487
  88. pipecat/transports/services/daily.py +9 -2334
  89. pipecat/transports/services/helpers/daily_rest.py +12 -396
  90. pipecat/transports/services/livekit.py +12 -975
  91. pipecat/transports/services/tavus.py +12 -757
  92. pipecat/transports/smallwebrtc/__init__.py +0 -0
  93. pipecat/transports/smallwebrtc/connection.py +612 -0
  94. pipecat/transports/smallwebrtc/transport.py +936 -0
  95. pipecat/transports/tavus/__init__.py +0 -0
  96. pipecat/transports/tavus/transport.py +770 -0
  97. pipecat/transports/websocket/__init__.py +0 -0
  98. pipecat/transports/websocket/client.py +494 -0
  99. pipecat/transports/websocket/fastapi.py +559 -0
  100. pipecat/transports/websocket/server.py +500 -0
  101. pipecat/transports/whatsapp/__init__.py +0 -0
  102. pipecat/transports/whatsapp/api.py +345 -0
  103. pipecat/transports/whatsapp/client.py +364 -0
  104. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
  105. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
  106. {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
@@ -38,8 +38,8 @@ from pipecat.frames.frames import (
38
38
  StartFrame,
39
39
  StartInterruptionFrame,
40
40
  StopFrame,
41
- StopInterruptionFrame,
42
41
  SystemFrame,
42
+ UserSpeakingFrame,
43
43
  UserStartedSpeakingFrame,
44
44
  UserStoppedSpeakingFrame,
45
45
  VADParamsUpdateFrame,
@@ -298,11 +298,11 @@ class BaseInputTransport(FrameProcessor):
298
298
  await self._handle_bot_stopped_speaking(frame)
299
299
  await self.push_frame(frame, direction)
300
300
  elif isinstance(frame, EmulateUserStartedSpeakingFrame):
301
- self.logger.debug("Emulating user started speaking")
302
- await self._handle_user_interruption(UserStartedSpeakingFrame(emulated=True))
301
+ logger.debug("Emulating user started speaking")
302
+ await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
303
303
  elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
304
- self.logger.debug("Emulating user stopped speaking")
305
- await self._handle_user_interruption(UserStoppedSpeakingFrame(emulated=True))
304
+ logger.debug("Emulating user stopped speaking")
305
+ await self._handle_user_interruption(VADState.QUIET, emulated=True)
306
306
  # All other system frames
307
307
  elif isinstance(frame, VADParamsUpdateFrame):
308
308
  if self.vad_analyzer:
@@ -342,12 +342,16 @@ class BaseInputTransport(FrameProcessor):
342
342
  await self._start_interruption()
343
343
  await self.push_frame(StartInterruptionFrame())
344
344
 
345
- async def _handle_user_interruption(self, frame: Frame):
345
+ async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
346
346
  """Handle user interruption events based on speaking state."""
347
- if isinstance(frame, UserStartedSpeakingFrame):
347
+ if vad_state == VADState.SPEAKING:
348
348
  self.logger.debug("User started speaking")
349
349
  self._user_speaking = True
350
- await self.push_frame(frame)
350
+
351
+ upstream_frame = UserStartedSpeakingFrame(emulated=emulated)
352
+ downstream_frame = UserStartedSpeakingFrame(emulated=emulated)
353
+ await self.push_frame(downstream_frame)
354
+ await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
351
355
 
352
356
  # Only push StartInterruptionFrame if:
353
357
  # 1. No interruption config is set, OR
@@ -368,13 +372,17 @@ class BaseInputTransport(FrameProcessor):
368
372
  "User started speaking while bot is speaking with interruption config - "
369
373
  "deferring interruption to aggregator"
370
374
  )
371
- elif isinstance(frame, UserStoppedSpeakingFrame):
375
+ elif vad_state == VADState.QUIET:
372
376
  self.logger.debug("User stopped speaking")
373
377
  self._user_speaking = False
374
- await self.push_frame(frame)
378
+
379
+ upstream_frame = UserStoppedSpeakingFrame(emulated=emulated)
380
+ downstream_frame = UserStoppedSpeakingFrame(emulated=emulated)
381
+ await self.push_frame(downstream_frame)
382
+ await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
383
+
375
384
  if self.interruptions_allowed:
376
385
  await self._stop_interruption()
377
- await self.push_frame(StopInterruptionFrame())
378
386
 
379
387
  #
380
388
  # Handle bot speaking state
@@ -413,7 +421,7 @@ class BaseInputTransport(FrameProcessor):
413
421
  )
414
422
  return state
415
423
 
416
- async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState):
424
+ async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
417
425
  """Handle Voice Activity Detection results and generate appropriate frames."""
418
426
  new_vad_state = await self._vad_analyze(audio_frame)
419
427
  if (
@@ -421,7 +429,8 @@ class BaseInputTransport(FrameProcessor):
421
429
  and new_vad_state != VADState.STARTING
422
430
  and new_vad_state != VADState.STOPPING
423
431
  ):
424
- frame = None
432
+ interruption_state = None
433
+
425
434
  # If the turn analyser is enabled, this will prevent:
426
435
  # - Creating the UserStoppedSpeakingFrame
427
436
  # - Creating the UserStartedSpeakingFrame multiple times
@@ -432,14 +441,14 @@ class BaseInputTransport(FrameProcessor):
432
441
  if new_vad_state == VADState.SPEAKING:
433
442
  await self.push_frame(VADUserStartedSpeakingFrame())
434
443
  if can_create_user_frames:
435
- frame = UserStartedSpeakingFrame()
444
+ interruption_state = VADState.SPEAKING
436
445
  elif new_vad_state == VADState.QUIET:
437
446
  await self.push_frame(VADUserStoppedSpeakingFrame())
438
447
  if can_create_user_frames:
439
- frame = UserStoppedSpeakingFrame()
448
+ interruption_state = VADState.QUIET
440
449
 
441
- if frame:
442
- await self._handle_user_interruption(frame)
450
+ if interruption_state:
451
+ await self._handle_user_interruption(interruption_state)
443
452
 
444
453
  vad_state = new_vad_state
445
454
  return vad_state
@@ -454,7 +463,7 @@ class BaseInputTransport(FrameProcessor):
454
463
  async def _handle_end_of_turn_complete(self, state: EndOfTurnState):
455
464
  """Handle completion of end-of-turn analysis."""
456
465
  if state == EndOfTurnState.COMPLETE:
457
- await self._handle_user_interruption(UserStoppedSpeakingFrame())
466
+ await self._handle_user_interruption(VADState.QUIET)
458
467
 
459
468
  async def _run_turn_analyzer(
460
469
  self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
@@ -491,6 +500,10 @@ class BaseInputTransport(FrameProcessor):
491
500
  if self._params.turn_analyzer:
492
501
  await self._run_turn_analyzer(frame, vad_state, previous_vad_state)
493
502
 
503
+ if vad_state == VADState.SPEAKING:
504
+ await self.push_frame(UserSpeakingFrame())
505
+ await self.push_frame(UserSpeakingFrame(), FrameDirection.UPSTREAM)
506
+
494
507
  # Push audio downstream if passthrough is set.
495
508
  if self._params.audio_in_passthrough:
496
509
  await self.push_frame(frame)
@@ -504,7 +517,7 @@ class BaseInputTransport(FrameProcessor):
504
517
  vad_state = VADState.QUIET
505
518
  if self._params.turn_analyzer:
506
519
  self._params.turn_analyzer.clear()
507
- await self._handle_user_interruption(UserStoppedSpeakingFrame())
520
+ await self._handle_user_interruption(VADState.QUIET)
508
521
 
509
522
  async def _handle_prediction_result(self, result: MetricsData):
510
523
  """Handle a prediction result event from the turn analyzer."""
@@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
19
19
  from loguru import logger
20
20
  from PIL import Image
21
21
 
22
+ from pipecat.audio.dtmf.utils import load_dtmf_audio
22
23
  from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
23
24
  from pipecat.audio.utils import create_stream_resampler, is_silence
24
25
  from pipecat.frames.frames import (
@@ -28,6 +29,7 @@ from pipecat.frames.frames import (
28
29
  CancelFrame,
29
30
  EndFrame,
30
31
  Frame,
32
+ InputTransportMessageUrgentFrame,
31
33
  MixerControlFrame,
32
34
  OutputAudioRawFrame,
33
35
  OutputDTMFFrame,
@@ -38,7 +40,6 @@ from pipecat.frames.frames import (
38
40
  SpriteFrame,
39
41
  StartFrame,
40
42
  StartInterruptionFrame,
41
- StopInterruptionFrame,
42
43
  SystemFrame,
43
44
  TransportMessageFrame,
44
45
  TransportMessageUrgentFrame,
@@ -219,12 +220,43 @@ class BaseOutputTransport(FrameProcessor):
219
220
  pass
220
221
 
221
222
  async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
222
- """Write a DTMF tone to the transport.
223
+ """Write a DTMF tone using the transport's preferred method.
223
224
 
224
225
  Args:
225
226
  frame: The DTMF frame to write.
226
227
  """
227
- pass
228
+ if self._supports_native_dtmf():
229
+ await self._write_dtmf_native(frame)
230
+ else:
231
+ await self._write_dtmf_audio(frame)
232
+
233
+ def _supports_native_dtmf(self) -> bool:
234
+ """Override in transport implementations that support native DTMF.
235
+
236
+ Returns:
237
+ True if the transport supports native DTMF, False otherwise.
238
+ """
239
+ return False
240
+
241
+ async def _write_dtmf_native(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
242
+ """Override in transport implementations for native DTMF.
243
+
244
+ Args:
245
+ frame: The DTMF frame to write.
246
+ """
247
+ raise NotImplementedError("Transport claims native DTMF support but doesn't implement it")
248
+
249
+ async def _write_dtmf_audio(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
250
+ """Generate and send audio tones for DTMF.
251
+
252
+ Args:
253
+ frame: The DTMF frame to write.
254
+ """
255
+ dtmf_audio = await load_dtmf_audio(frame.button, sample_rate=self._sample_rate)
256
+ dtmf_audio_frame = OutputAudioRawFrame(
257
+ audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
258
+ )
259
+ await self.write_audio_frame(dtmf_audio_frame)
228
260
 
229
261
  async def send_audio(self, frame: OutputAudioRawFrame):
230
262
  """Send an audio frame downstream.
@@ -268,10 +300,12 @@ class BaseOutputTransport(FrameProcessor):
268
300
  elif isinstance(frame, CancelFrame):
269
301
  await self.cancel(frame)
270
302
  await self.push_frame(frame, direction)
271
- elif isinstance(frame, (StartInterruptionFrame, StopInterruptionFrame)):
303
+ elif isinstance(frame, StartInterruptionFrame):
272
304
  await self.push_frame(frame, direction)
273
305
  await self._handle_frame(frame)
274
- elif isinstance(frame, TransportMessageUrgentFrame):
306
+ elif isinstance(frame, TransportMessageUrgentFrame) and not isinstance(
307
+ frame, InputTransportMessageUrgentFrame
308
+ ):
275
309
  await self.send_message(frame)
276
310
  elif isinstance(frame, OutputDTMFUrgentFrame):
277
311
  await self.write_dtmf(frame)
File without changes