dv-pipecat-ai 0.0.82.dev815__py3-none-any.whl → 0.0.82.dev857__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/METADATA +8 -3
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/RECORD +106 -79
- pipecat/adapters/base_llm_adapter.py +44 -6
- pipecat/adapters/services/anthropic_adapter.py +302 -2
- pipecat/adapters/services/aws_nova_sonic_adapter.py +40 -2
- pipecat/adapters/services/bedrock_adapter.py +40 -2
- pipecat/adapters/services/gemini_adapter.py +276 -6
- pipecat/adapters/services/open_ai_adapter.py +88 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +39 -1
- pipecat/audio/dtmf/__init__.py +0 -0
- pipecat/audio/dtmf/types.py +47 -0
- pipecat/audio/dtmf/utils.py +70 -0
- pipecat/audio/filters/aic_filter.py +199 -0
- pipecat/audio/utils.py +9 -7
- pipecat/extensions/ivr/__init__.py +0 -0
- pipecat/extensions/ivr/ivr_navigator.py +452 -0
- pipecat/frames/frames.py +156 -43
- pipecat/pipeline/llm_switcher.py +76 -0
- pipecat/pipeline/parallel_pipeline.py +3 -3
- pipecat/pipeline/service_switcher.py +144 -0
- pipecat/pipeline/task.py +68 -28
- pipecat/pipeline/task_observer.py +10 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -2
- pipecat/processors/aggregators/llm_context.py +277 -0
- pipecat/processors/aggregators/llm_response.py +48 -15
- pipecat/processors/aggregators/llm_response_universal.py +840 -0
- pipecat/processors/aggregators/openai_llm_context.py +3 -3
- pipecat/processors/dtmf_aggregator.py +0 -2
- pipecat/processors/filters/stt_mute_filter.py +0 -2
- pipecat/processors/frame_processor.py +18 -11
- pipecat/processors/frameworks/rtvi.py +17 -10
- pipecat/processors/metrics/sentry.py +2 -0
- pipecat/runner/daily.py +137 -36
- pipecat/runner/run.py +1 -1
- pipecat/runner/utils.py +7 -7
- pipecat/serializers/asterisk.py +20 -4
- pipecat/serializers/exotel.py +1 -1
- pipecat/serializers/plivo.py +1 -1
- pipecat/serializers/telnyx.py +1 -1
- pipecat/serializers/twilio.py +1 -1
- pipecat/services/__init__.py +2 -2
- pipecat/services/anthropic/llm.py +113 -28
- pipecat/services/asyncai/tts.py +4 -0
- pipecat/services/aws/llm.py +82 -8
- pipecat/services/aws/tts.py +0 -10
- pipecat/services/aws_nova_sonic/aws.py +5 -0
- pipecat/services/cartesia/tts.py +28 -16
- pipecat/services/cerebras/llm.py +15 -10
- pipecat/services/deepgram/stt.py +8 -0
- pipecat/services/deepseek/llm.py +13 -8
- pipecat/services/fireworks/llm.py +13 -8
- pipecat/services/fish/tts.py +8 -6
- pipecat/services/gemini_multimodal_live/gemini.py +5 -0
- pipecat/services/gladia/config.py +7 -1
- pipecat/services/gladia/stt.py +23 -15
- pipecat/services/google/llm.py +159 -59
- pipecat/services/google/llm_openai.py +18 -3
- pipecat/services/grok/llm.py +2 -1
- pipecat/services/llm_service.py +38 -3
- pipecat/services/mem0/memory.py +2 -1
- pipecat/services/mistral/llm.py +5 -6
- pipecat/services/nim/llm.py +2 -1
- pipecat/services/openai/base_llm.py +88 -26
- pipecat/services/openai/image.py +6 -1
- pipecat/services/openai_realtime_beta/openai.py +5 -2
- pipecat/services/openpipe/llm.py +6 -8
- pipecat/services/perplexity/llm.py +13 -8
- pipecat/services/playht/tts.py +9 -6
- pipecat/services/rime/tts.py +1 -1
- pipecat/services/sambanova/llm.py +18 -13
- pipecat/services/sarvam/tts.py +415 -10
- pipecat/services/speechmatics/stt.py +2 -2
- pipecat/services/tavus/video.py +1 -1
- pipecat/services/tts_service.py +15 -5
- pipecat/services/vistaar/llm.py +2 -5
- pipecat/transports/base_input.py +32 -19
- pipecat/transports/base_output.py +39 -5
- pipecat/transports/daily/__init__.py +0 -0
- pipecat/transports/daily/transport.py +2371 -0
- pipecat/transports/daily/utils.py +410 -0
- pipecat/transports/livekit/__init__.py +0 -0
- pipecat/transports/livekit/transport.py +1042 -0
- pipecat/transports/network/fastapi_websocket.py +12 -546
- pipecat/transports/network/small_webrtc.py +12 -922
- pipecat/transports/network/webrtc_connection.py +9 -595
- pipecat/transports/network/websocket_client.py +12 -481
- pipecat/transports/network/websocket_server.py +12 -487
- pipecat/transports/services/daily.py +9 -2334
- pipecat/transports/services/helpers/daily_rest.py +12 -396
- pipecat/transports/services/livekit.py +12 -975
- pipecat/transports/services/tavus.py +12 -757
- pipecat/transports/smallwebrtc/__init__.py +0 -0
- pipecat/transports/smallwebrtc/connection.py +612 -0
- pipecat/transports/smallwebrtc/transport.py +936 -0
- pipecat/transports/tavus/__init__.py +0 -0
- pipecat/transports/tavus/transport.py +770 -0
- pipecat/transports/websocket/__init__.py +0 -0
- pipecat/transports/websocket/client.py +494 -0
- pipecat/transports/websocket/fastapi.py +559 -0
- pipecat/transports/websocket/server.py +500 -0
- pipecat/transports/whatsapp/__init__.py +0 -0
- pipecat/transports/whatsapp/api.py +345 -0
- pipecat/transports/whatsapp/client.py +364 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev815.dist-info → dv_pipecat_ai-0.0.82.dev857.dist-info}/top_level.txt +0 -0
pipecat/transports/base_input.py
CHANGED
|
@@ -38,8 +38,8 @@ from pipecat.frames.frames import (
|
|
|
38
38
|
StartFrame,
|
|
39
39
|
StartInterruptionFrame,
|
|
40
40
|
StopFrame,
|
|
41
|
-
StopInterruptionFrame,
|
|
42
41
|
SystemFrame,
|
|
42
|
+
UserSpeakingFrame,
|
|
43
43
|
UserStartedSpeakingFrame,
|
|
44
44
|
UserStoppedSpeakingFrame,
|
|
45
45
|
VADParamsUpdateFrame,
|
|
@@ -298,11 +298,11 @@ class BaseInputTransport(FrameProcessor):
|
|
|
298
298
|
await self._handle_bot_stopped_speaking(frame)
|
|
299
299
|
await self.push_frame(frame, direction)
|
|
300
300
|
elif isinstance(frame, EmulateUserStartedSpeakingFrame):
|
|
301
|
-
|
|
302
|
-
await self._handle_user_interruption(
|
|
301
|
+
logger.debug("Emulating user started speaking")
|
|
302
|
+
await self._handle_user_interruption(VADState.SPEAKING, emulated=True)
|
|
303
303
|
elif isinstance(frame, EmulateUserStoppedSpeakingFrame):
|
|
304
|
-
|
|
305
|
-
await self._handle_user_interruption(
|
|
304
|
+
logger.debug("Emulating user stopped speaking")
|
|
305
|
+
await self._handle_user_interruption(VADState.QUIET, emulated=True)
|
|
306
306
|
# All other system frames
|
|
307
307
|
elif isinstance(frame, VADParamsUpdateFrame):
|
|
308
308
|
if self.vad_analyzer:
|
|
@@ -342,12 +342,16 @@ class BaseInputTransport(FrameProcessor):
|
|
|
342
342
|
await self._start_interruption()
|
|
343
343
|
await self.push_frame(StartInterruptionFrame())
|
|
344
344
|
|
|
345
|
-
async def _handle_user_interruption(self,
|
|
345
|
+
async def _handle_user_interruption(self, vad_state: VADState, emulated: bool = False):
|
|
346
346
|
"""Handle user interruption events based on speaking state."""
|
|
347
|
-
if
|
|
347
|
+
if vad_state == VADState.SPEAKING:
|
|
348
348
|
self.logger.debug("User started speaking")
|
|
349
349
|
self._user_speaking = True
|
|
350
|
-
|
|
350
|
+
|
|
351
|
+
upstream_frame = UserStartedSpeakingFrame(emulated=emulated)
|
|
352
|
+
downstream_frame = UserStartedSpeakingFrame(emulated=emulated)
|
|
353
|
+
await self.push_frame(downstream_frame)
|
|
354
|
+
await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
|
|
351
355
|
|
|
352
356
|
# Only push StartInterruptionFrame if:
|
|
353
357
|
# 1. No interruption config is set, OR
|
|
@@ -368,13 +372,17 @@ class BaseInputTransport(FrameProcessor):
|
|
|
368
372
|
"User started speaking while bot is speaking with interruption config - "
|
|
369
373
|
"deferring interruption to aggregator"
|
|
370
374
|
)
|
|
371
|
-
elif
|
|
375
|
+
elif vad_state == VADState.QUIET:
|
|
372
376
|
self.logger.debug("User stopped speaking")
|
|
373
377
|
self._user_speaking = False
|
|
374
|
-
|
|
378
|
+
|
|
379
|
+
upstream_frame = UserStoppedSpeakingFrame(emulated=emulated)
|
|
380
|
+
downstream_frame = UserStoppedSpeakingFrame(emulated=emulated)
|
|
381
|
+
await self.push_frame(downstream_frame)
|
|
382
|
+
await self.push_frame(upstream_frame, FrameDirection.UPSTREAM)
|
|
383
|
+
|
|
375
384
|
if self.interruptions_allowed:
|
|
376
385
|
await self._stop_interruption()
|
|
377
|
-
await self.push_frame(StopInterruptionFrame())
|
|
378
386
|
|
|
379
387
|
#
|
|
380
388
|
# Handle bot speaking state
|
|
@@ -413,7 +421,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
413
421
|
)
|
|
414
422
|
return state
|
|
415
423
|
|
|
416
|
-
async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState):
|
|
424
|
+
async def _handle_vad(self, audio_frame: InputAudioRawFrame, vad_state: VADState) -> VADState:
|
|
417
425
|
"""Handle Voice Activity Detection results and generate appropriate frames."""
|
|
418
426
|
new_vad_state = await self._vad_analyze(audio_frame)
|
|
419
427
|
if (
|
|
@@ -421,7 +429,8 @@ class BaseInputTransport(FrameProcessor):
|
|
|
421
429
|
and new_vad_state != VADState.STARTING
|
|
422
430
|
and new_vad_state != VADState.STOPPING
|
|
423
431
|
):
|
|
424
|
-
|
|
432
|
+
interruption_state = None
|
|
433
|
+
|
|
425
434
|
# If the turn analyser is enabled, this will prevent:
|
|
426
435
|
# - Creating the UserStoppedSpeakingFrame
|
|
427
436
|
# - Creating the UserStartedSpeakingFrame multiple times
|
|
@@ -432,14 +441,14 @@ class BaseInputTransport(FrameProcessor):
|
|
|
432
441
|
if new_vad_state == VADState.SPEAKING:
|
|
433
442
|
await self.push_frame(VADUserStartedSpeakingFrame())
|
|
434
443
|
if can_create_user_frames:
|
|
435
|
-
|
|
444
|
+
interruption_state = VADState.SPEAKING
|
|
436
445
|
elif new_vad_state == VADState.QUIET:
|
|
437
446
|
await self.push_frame(VADUserStoppedSpeakingFrame())
|
|
438
447
|
if can_create_user_frames:
|
|
439
|
-
|
|
448
|
+
interruption_state = VADState.QUIET
|
|
440
449
|
|
|
441
|
-
if
|
|
442
|
-
await self._handle_user_interruption(
|
|
450
|
+
if interruption_state:
|
|
451
|
+
await self._handle_user_interruption(interruption_state)
|
|
443
452
|
|
|
444
453
|
vad_state = new_vad_state
|
|
445
454
|
return vad_state
|
|
@@ -454,7 +463,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
454
463
|
async def _handle_end_of_turn_complete(self, state: EndOfTurnState):
|
|
455
464
|
"""Handle completion of end-of-turn analysis."""
|
|
456
465
|
if state == EndOfTurnState.COMPLETE:
|
|
457
|
-
await self._handle_user_interruption(
|
|
466
|
+
await self._handle_user_interruption(VADState.QUIET)
|
|
458
467
|
|
|
459
468
|
async def _run_turn_analyzer(
|
|
460
469
|
self, frame: InputAudioRawFrame, vad_state: VADState, previous_vad_state: VADState
|
|
@@ -491,6 +500,10 @@ class BaseInputTransport(FrameProcessor):
|
|
|
491
500
|
if self._params.turn_analyzer:
|
|
492
501
|
await self._run_turn_analyzer(frame, vad_state, previous_vad_state)
|
|
493
502
|
|
|
503
|
+
if vad_state == VADState.SPEAKING:
|
|
504
|
+
await self.push_frame(UserSpeakingFrame())
|
|
505
|
+
await self.push_frame(UserSpeakingFrame(), FrameDirection.UPSTREAM)
|
|
506
|
+
|
|
494
507
|
# Push audio downstream if passthrough is set.
|
|
495
508
|
if self._params.audio_in_passthrough:
|
|
496
509
|
await self.push_frame(frame)
|
|
@@ -504,7 +517,7 @@ class BaseInputTransport(FrameProcessor):
|
|
|
504
517
|
vad_state = VADState.QUIET
|
|
505
518
|
if self._params.turn_analyzer:
|
|
506
519
|
self._params.turn_analyzer.clear()
|
|
507
|
-
await self._handle_user_interruption(
|
|
520
|
+
await self._handle_user_interruption(VADState.QUIET)
|
|
508
521
|
|
|
509
522
|
async def _handle_prediction_result(self, result: MetricsData):
|
|
510
523
|
"""Handle a prediction result event from the turn analyzer."""
|
|
@@ -19,6 +19,7 @@ from typing import Any, AsyncGenerator, Dict, List, Mapping, Optional
|
|
|
19
19
|
from loguru import logger
|
|
20
20
|
from PIL import Image
|
|
21
21
|
|
|
22
|
+
from pipecat.audio.dtmf.utils import load_dtmf_audio
|
|
22
23
|
from pipecat.audio.mixers.base_audio_mixer import BaseAudioMixer
|
|
23
24
|
from pipecat.audio.utils import create_stream_resampler, is_silence
|
|
24
25
|
from pipecat.frames.frames import (
|
|
@@ -28,6 +29,7 @@ from pipecat.frames.frames import (
|
|
|
28
29
|
CancelFrame,
|
|
29
30
|
EndFrame,
|
|
30
31
|
Frame,
|
|
32
|
+
InputTransportMessageUrgentFrame,
|
|
31
33
|
MixerControlFrame,
|
|
32
34
|
OutputAudioRawFrame,
|
|
33
35
|
OutputDTMFFrame,
|
|
@@ -38,7 +40,6 @@ from pipecat.frames.frames import (
|
|
|
38
40
|
SpriteFrame,
|
|
39
41
|
StartFrame,
|
|
40
42
|
StartInterruptionFrame,
|
|
41
|
-
StopInterruptionFrame,
|
|
42
43
|
SystemFrame,
|
|
43
44
|
TransportMessageFrame,
|
|
44
45
|
TransportMessageUrgentFrame,
|
|
@@ -219,12 +220,43 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
219
220
|
pass
|
|
220
221
|
|
|
221
222
|
async def write_dtmf(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
|
|
222
|
-
"""Write a DTMF tone
|
|
223
|
+
"""Write a DTMF tone using the transport's preferred method.
|
|
223
224
|
|
|
224
225
|
Args:
|
|
225
226
|
frame: The DTMF frame to write.
|
|
226
227
|
"""
|
|
227
|
-
|
|
228
|
+
if self._supports_native_dtmf():
|
|
229
|
+
await self._write_dtmf_native(frame)
|
|
230
|
+
else:
|
|
231
|
+
await self._write_dtmf_audio(frame)
|
|
232
|
+
|
|
233
|
+
def _supports_native_dtmf(self) -> bool:
|
|
234
|
+
"""Override in transport implementations that support native DTMF.
|
|
235
|
+
|
|
236
|
+
Returns:
|
|
237
|
+
True if the transport supports native DTMF, False otherwise.
|
|
238
|
+
"""
|
|
239
|
+
return False
|
|
240
|
+
|
|
241
|
+
async def _write_dtmf_native(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
|
|
242
|
+
"""Override in transport implementations for native DTMF.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
frame: The DTMF frame to write.
|
|
246
|
+
"""
|
|
247
|
+
raise NotImplementedError("Transport claims native DTMF support but doesn't implement it")
|
|
248
|
+
|
|
249
|
+
async def _write_dtmf_audio(self, frame: OutputDTMFFrame | OutputDTMFUrgentFrame):
|
|
250
|
+
"""Generate and send audio tones for DTMF.
|
|
251
|
+
|
|
252
|
+
Args:
|
|
253
|
+
frame: The DTMF frame to write.
|
|
254
|
+
"""
|
|
255
|
+
dtmf_audio = await load_dtmf_audio(frame.button, sample_rate=self._sample_rate)
|
|
256
|
+
dtmf_audio_frame = OutputAudioRawFrame(
|
|
257
|
+
audio=dtmf_audio, sample_rate=self._sample_rate, num_channels=1
|
|
258
|
+
)
|
|
259
|
+
await self.write_audio_frame(dtmf_audio_frame)
|
|
228
260
|
|
|
229
261
|
async def send_audio(self, frame: OutputAudioRawFrame):
|
|
230
262
|
"""Send an audio frame downstream.
|
|
@@ -268,10 +300,12 @@ class BaseOutputTransport(FrameProcessor):
|
|
|
268
300
|
elif isinstance(frame, CancelFrame):
|
|
269
301
|
await self.cancel(frame)
|
|
270
302
|
await self.push_frame(frame, direction)
|
|
271
|
-
elif isinstance(frame,
|
|
303
|
+
elif isinstance(frame, StartInterruptionFrame):
|
|
272
304
|
await self.push_frame(frame, direction)
|
|
273
305
|
await self._handle_frame(frame)
|
|
274
|
-
elif isinstance(frame, TransportMessageUrgentFrame)
|
|
306
|
+
elif isinstance(frame, TransportMessageUrgentFrame) and not isinstance(
|
|
307
|
+
frame, InputTransportMessageUrgentFrame
|
|
308
|
+
):
|
|
275
309
|
await self.send_message(frame)
|
|
276
310
|
elif isinstance(frame, OutputDTMFUrgentFrame):
|
|
277
311
|
await self.write_dtmf(frame)
|
|
File without changes
|