dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -11,7 +11,9 @@ data structures for voice activity detection in audio streams. Includes state
|
|
|
11
11
|
management, parameter configuration, and audio analysis framework.
|
|
12
12
|
"""
|
|
13
13
|
|
|
14
|
+
import asyncio
|
|
14
15
|
from abc import ABC, abstractmethod
|
|
16
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
15
17
|
from enum import Enum
|
|
16
18
|
from typing import Optional
|
|
17
19
|
|
|
@@ -85,6 +87,10 @@ class VADAnalyzer(ABC):
|
|
|
85
87
|
self._smoothing_factor = 0.2
|
|
86
88
|
self._prev_volume = 0
|
|
87
89
|
|
|
90
|
+
# Thread executor that will run the model. We only need one thread per
|
|
91
|
+
# analyzer because one analyzer just handles one audio stream.
|
|
92
|
+
self._executor = ThreadPoolExecutor(max_workers=1)
|
|
93
|
+
|
|
88
94
|
@property
|
|
89
95
|
def sample_rate(self) -> int:
|
|
90
96
|
"""Get the current sample rate.
|
|
@@ -161,7 +167,7 @@ class VADAnalyzer(ABC):
|
|
|
161
167
|
volume = calculate_audio_volume(audio, self.sample_rate)
|
|
162
168
|
return exp_smoothing(volume, self._prev_volume, self._smoothing_factor)
|
|
163
169
|
|
|
164
|
-
def analyze_audio(self, buffer) -> VADState:
|
|
170
|
+
async def analyze_audio(self, buffer: bytes) -> VADState:
|
|
165
171
|
"""Analyze audio buffer and return current VAD state.
|
|
166
172
|
|
|
167
173
|
Processes incoming audio data, maintains internal state, and determines
|
|
@@ -173,6 +179,12 @@ class VADAnalyzer(ABC):
|
|
|
173
179
|
Returns:
|
|
174
180
|
Current VAD state after processing the buffer.
|
|
175
181
|
"""
|
|
182
|
+
loop = asyncio.get_running_loop()
|
|
183
|
+
state = await loop.run_in_executor(self._executor, self._run_analyzer, buffer)
|
|
184
|
+
return state
|
|
185
|
+
|
|
186
|
+
def _run_analyzer(self, buffer: bytes) -> VADState:
|
|
187
|
+
"""Analyze audio buffer and return current VAD state."""
|
|
176
188
|
self._vad_buffer += buffer
|
|
177
189
|
|
|
178
190
|
num_required_bytes = self._vad_frames_num_bytes
|
|
@@ -21,7 +21,6 @@ from typing import List, Optional
|
|
|
21
21
|
from loguru import logger
|
|
22
22
|
|
|
23
23
|
from pipecat.frames.frames import (
|
|
24
|
-
BotInterruptionFrame,
|
|
25
24
|
EndFrame,
|
|
26
25
|
Frame,
|
|
27
26
|
LLMFullResponseEndFrame,
|
|
@@ -37,7 +36,8 @@ from pipecat.frames.frames import (
|
|
|
37
36
|
UserStoppedSpeakingFrame,
|
|
38
37
|
)
|
|
39
38
|
from pipecat.pipeline.parallel_pipeline import ParallelPipeline
|
|
40
|
-
from pipecat.processors.aggregators.
|
|
39
|
+
from pipecat.processors.aggregators.llm_context import LLMContext
|
|
40
|
+
from pipecat.processors.aggregators.llm_response_universal import LLMContextAggregatorPair
|
|
41
41
|
from pipecat.processors.frame_processor import FrameDirection, FrameProcessor, FrameProcessorSetup
|
|
42
42
|
from pipecat.services.llm_service import LLMService
|
|
43
43
|
from pipecat.sync.base_notifier import BaseNotifier
|
|
@@ -360,7 +360,7 @@ class ClassificationProcessor(FrameProcessor):
|
|
|
360
360
|
await self._voicemail_notifier.notify() # Clear buffered TTS frames
|
|
361
361
|
|
|
362
362
|
# Interrupt the current pipeline to stop any ongoing processing
|
|
363
|
-
await self.
|
|
363
|
+
await self.push_interruption_task_frame_and_wait()
|
|
364
364
|
|
|
365
365
|
# Set the voicemail event to trigger the voicemail handler
|
|
366
366
|
self._voicemail_event.clear()
|
|
@@ -615,8 +615,8 @@ VOICEMAIL SYSTEM (respond "VOICEMAIL"):
|
|
|
615
615
|
]
|
|
616
616
|
|
|
617
617
|
# Create the LLM context and aggregators for conversation management
|
|
618
|
-
self._context =
|
|
619
|
-
self._context_aggregator =
|
|
618
|
+
self._context = LLMContext(self._messages)
|
|
619
|
+
self._context_aggregator = LLMContextAggregatorPair(self._context)
|
|
620
620
|
|
|
621
621
|
# Create notification system for coordinating between components
|
|
622
622
|
self._gate_notifier = EventNotifier() # Signals classification completion
|
pipecat/frames/frames.py
CHANGED
|
@@ -451,12 +451,14 @@ class TranscriptionMessage:
|
|
|
451
451
|
content: The message content/text.
|
|
452
452
|
user_id: Optional identifier for the user.
|
|
453
453
|
timestamp: Optional timestamp when the message was created.
|
|
454
|
+
message_id: Optional unique identifier for tracking and dropping messages.
|
|
454
455
|
"""
|
|
455
456
|
|
|
456
457
|
role: Literal["user", "assistant"]
|
|
457
458
|
content: str
|
|
458
459
|
user_id: Optional[str] = None
|
|
459
460
|
timestamp: Optional[str] = None
|
|
461
|
+
message_id: Optional[int] = None
|
|
460
462
|
|
|
461
463
|
|
|
462
464
|
@dataclass
|
|
@@ -510,6 +512,17 @@ class TranscriptionUpdateFrame(DataFrame):
|
|
|
510
512
|
return f"{self.name}(pts: {pts}, messages: {len(self.messages)})"
|
|
511
513
|
|
|
512
514
|
|
|
515
|
+
@dataclass
|
|
516
|
+
class TranscriptDropFrame(DataFrame):
|
|
517
|
+
"""Frame indicating previously emitted transcript chunks should be discarded.
|
|
518
|
+
|
|
519
|
+
Parameters:
|
|
520
|
+
transcript_ids: List of frame/message identifiers to drop.
|
|
521
|
+
"""
|
|
522
|
+
|
|
523
|
+
transcript_ids: List[int]
|
|
524
|
+
|
|
525
|
+
|
|
513
526
|
@dataclass
|
|
514
527
|
class LLMContextFrame(Frame):
|
|
515
528
|
"""Frame containing a universal LLM context.
|
|
@@ -672,7 +685,7 @@ class TTSSpeakFrame(DataFrame):
|
|
|
672
685
|
|
|
673
686
|
|
|
674
687
|
@dataclass
|
|
675
|
-
class
|
|
688
|
+
class OutputTransportMessageFrame(DataFrame):
|
|
676
689
|
"""Frame containing transport-specific message data.
|
|
677
690
|
|
|
678
691
|
Parameters:
|
|
@@ -685,6 +698,32 @@ class TransportMessageFrame(DataFrame):
|
|
|
685
698
|
return f"{self.name}(message: {self.message})"
|
|
686
699
|
|
|
687
700
|
|
|
701
|
+
@dataclass
|
|
702
|
+
class TransportMessageFrame(OutputTransportMessageFrame):
|
|
703
|
+
"""Frame containing transport-specific message data.
|
|
704
|
+
|
|
705
|
+
.. deprecated:: 0.0.87
|
|
706
|
+
This frame is deprecated and will be removed in a future version.
|
|
707
|
+
Instead, use `OutputTransportMessageFrame`.
|
|
708
|
+
|
|
709
|
+
Parameters:
|
|
710
|
+
message: The transport message payload.
|
|
711
|
+
"""
|
|
712
|
+
|
|
713
|
+
def __post_init__(self):
|
|
714
|
+
super().__post_init__()
|
|
715
|
+
import warnings
|
|
716
|
+
|
|
717
|
+
with warnings.catch_warnings():
|
|
718
|
+
warnings.simplefilter("always")
|
|
719
|
+
warnings.warn(
|
|
720
|
+
"TransportMessageFrame is deprecated and will be removed in a future version. "
|
|
721
|
+
"Instead, use OutputTransportMessageFrame.",
|
|
722
|
+
DeprecationWarning,
|
|
723
|
+
stacklevel=2,
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
|
|
688
727
|
@dataclass
|
|
689
728
|
class DTMFFrame:
|
|
690
729
|
"""Base class for DTMF (Dual-Tone Multi-Frequency) keypad frames.
|
|
@@ -788,43 +827,6 @@ class FatalErrorFrame(ErrorFrame):
|
|
|
788
827
|
fatal: bool = field(default=True, init=False)
|
|
789
828
|
|
|
790
829
|
|
|
791
|
-
@dataclass
|
|
792
|
-
class EndTaskFrame(SystemFrame):
|
|
793
|
-
"""Frame to request graceful pipeline task closure.
|
|
794
|
-
|
|
795
|
-
This is used to notify the pipeline task that the pipeline should be
|
|
796
|
-
closed nicely (flushing all the queued frames) by pushing an EndFrame
|
|
797
|
-
downstream. This frame should be pushed upstream.
|
|
798
|
-
"""
|
|
799
|
-
|
|
800
|
-
pass
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
@dataclass
|
|
804
|
-
class CancelTaskFrame(SystemFrame):
|
|
805
|
-
"""Frame to request immediate pipeline task cancellation.
|
|
806
|
-
|
|
807
|
-
This is used to notify the pipeline task that the pipeline should be
|
|
808
|
-
stopped immediately by pushing a CancelFrame downstream. This frame
|
|
809
|
-
should be pushed upstream.
|
|
810
|
-
"""
|
|
811
|
-
|
|
812
|
-
pass
|
|
813
|
-
|
|
814
|
-
|
|
815
|
-
@dataclass
|
|
816
|
-
class StopTaskFrame(SystemFrame):
|
|
817
|
-
"""Frame to request pipeline task stop while keeping processors running.
|
|
818
|
-
|
|
819
|
-
This is used to notify the pipeline task that it should be stopped as
|
|
820
|
-
soon as possible (flushing all the queued frames) but that the pipeline
|
|
821
|
-
processors should be kept in a running state. This frame should be pushed
|
|
822
|
-
upstream.
|
|
823
|
-
"""
|
|
824
|
-
|
|
825
|
-
pass
|
|
826
|
-
|
|
827
|
-
|
|
828
830
|
@dataclass
|
|
829
831
|
class FrameProcessorPauseUrgentFrame(SystemFrame):
|
|
830
832
|
"""Frame to pause frame processing immediately.
|
|
@@ -857,7 +859,7 @@ class FrameProcessorResumeUrgentFrame(SystemFrame):
|
|
|
857
859
|
|
|
858
860
|
|
|
859
861
|
@dataclass
|
|
860
|
-
class
|
|
862
|
+
class InterruptionFrame(SystemFrame):
|
|
861
863
|
"""Frame indicating user started speaking (interruption detected).
|
|
862
864
|
|
|
863
865
|
Emitted by the BaseInputTransport to indicate that a user has started
|
|
@@ -869,6 +871,34 @@ class StartInterruptionFrame(SystemFrame):
|
|
|
869
871
|
pass
|
|
870
872
|
|
|
871
873
|
|
|
874
|
+
@dataclass
|
|
875
|
+
class StartInterruptionFrame(InterruptionFrame):
|
|
876
|
+
"""Frame indicating user started speaking (interruption detected).
|
|
877
|
+
|
|
878
|
+
.. deprecated:: 0.0.85
|
|
879
|
+
This frame is deprecated and will be removed in a future version.
|
|
880
|
+
Instead, use `InterruptionFrame`.
|
|
881
|
+
|
|
882
|
+
Emitted by the BaseInputTransport to indicate that a user has started
|
|
883
|
+
speaking (i.e. is interrupting). This is similar to
|
|
884
|
+
UserStartedSpeakingFrame except that it should be pushed concurrently
|
|
885
|
+
with other frames (so the order is not guaranteed).
|
|
886
|
+
"""
|
|
887
|
+
|
|
888
|
+
def __post_init__(self):
|
|
889
|
+
super().__post_init__()
|
|
890
|
+
import warnings
|
|
891
|
+
|
|
892
|
+
with warnings.catch_warnings():
|
|
893
|
+
warnings.simplefilter("always")
|
|
894
|
+
warnings.warn(
|
|
895
|
+
"StartInterruptionFrame is deprecated and will be removed in a future version. "
|
|
896
|
+
"Instead, use InterruptionFrame.",
|
|
897
|
+
DeprecationWarning,
|
|
898
|
+
stacklevel=2,
|
|
899
|
+
)
|
|
900
|
+
|
|
901
|
+
|
|
872
902
|
@dataclass
|
|
873
903
|
class UserStartedSpeakingFrame(SystemFrame):
|
|
874
904
|
"""Frame indicating user has started speaking.
|
|
@@ -944,20 +974,6 @@ class VADUserStoppedSpeakingFrame(SystemFrame):
|
|
|
944
974
|
pass
|
|
945
975
|
|
|
946
976
|
|
|
947
|
-
@dataclass
|
|
948
|
-
class BotInterruptionFrame(SystemFrame):
|
|
949
|
-
"""Frame indicating the bot should be interrupted.
|
|
950
|
-
|
|
951
|
-
Emitted when the bot should be interrupted. This will mainly cause the
|
|
952
|
-
same actions as if the user interrupted except that the
|
|
953
|
-
UserStartedSpeakingFrame and UserStoppedSpeakingFrame won't be generated.
|
|
954
|
-
This frame should be pushed upstreams. It results in the BaseInputTransport
|
|
955
|
-
starting an interruption by pushing a StartInterruptionFrame downstream.
|
|
956
|
-
"""
|
|
957
|
-
|
|
958
|
-
pass
|
|
959
|
-
|
|
960
|
-
|
|
961
977
|
@dataclass
|
|
962
978
|
class BotStartedSpeakingFrame(SystemFrame):
|
|
963
979
|
"""Frame indicating the bot started speaking.
|
|
@@ -1115,8 +1131,8 @@ class STTMuteFrame(SystemFrame):
|
|
|
1115
1131
|
|
|
1116
1132
|
|
|
1117
1133
|
@dataclass
|
|
1118
|
-
class
|
|
1119
|
-
"""Frame for
|
|
1134
|
+
class InputTransportMessageFrame(SystemFrame):
|
|
1135
|
+
"""Frame for transport messages received from external sources.
|
|
1120
1136
|
|
|
1121
1137
|
Parameters:
|
|
1122
1138
|
message: The urgent transport message payload.
|
|
@@ -1129,20 +1145,69 @@ class TransportMessageUrgentFrame(SystemFrame):
|
|
|
1129
1145
|
|
|
1130
1146
|
|
|
1131
1147
|
@dataclass
|
|
1132
|
-
class InputTransportMessageUrgentFrame(
|
|
1148
|
+
class InputTransportMessageUrgentFrame(InputTransportMessageFrame):
|
|
1133
1149
|
"""Frame for transport messages received from external sources.
|
|
1134
1150
|
|
|
1135
|
-
|
|
1136
|
-
|
|
1137
|
-
|
|
1138
|
-
TransportMessageFrame while marking the message as having been received
|
|
1139
|
-
rather than generated locally.
|
|
1151
|
+
.. deprecated:: 0.0.87
|
|
1152
|
+
This frame is deprecated and will be removed in a future version.
|
|
1153
|
+
Instead, use `InputTransportMessageFrame`.
|
|
1140
1154
|
|
|
1141
|
-
|
|
1142
|
-
|
|
1155
|
+
Parameters:
|
|
1156
|
+
message: The urgent transport message payload.
|
|
1143
1157
|
"""
|
|
1144
1158
|
|
|
1145
|
-
|
|
1159
|
+
def __post_init__(self):
|
|
1160
|
+
super().__post_init__()
|
|
1161
|
+
import warnings
|
|
1162
|
+
|
|
1163
|
+
with warnings.catch_warnings():
|
|
1164
|
+
warnings.simplefilter("always")
|
|
1165
|
+
warnings.warn(
|
|
1166
|
+
"InputTransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
|
1167
|
+
"Instead, use InputTransportMessageFrame.",
|
|
1168
|
+
DeprecationWarning,
|
|
1169
|
+
stacklevel=2,
|
|
1170
|
+
)
|
|
1171
|
+
|
|
1172
|
+
|
|
1173
|
+
@dataclass
|
|
1174
|
+
class OutputTransportMessageUrgentFrame(SystemFrame):
|
|
1175
|
+
"""Frame for urgent transport messages that need to be sent immediately.
|
|
1176
|
+
|
|
1177
|
+
Parameters:
|
|
1178
|
+
message: The urgent transport message payload.
|
|
1179
|
+
"""
|
|
1180
|
+
|
|
1181
|
+
message: Any
|
|
1182
|
+
|
|
1183
|
+
def __str__(self):
|
|
1184
|
+
return f"{self.name}(message: {self.message})"
|
|
1185
|
+
|
|
1186
|
+
|
|
1187
|
+
@dataclass
|
|
1188
|
+
class TransportMessageUrgentFrame(OutputTransportMessageUrgentFrame):
|
|
1189
|
+
"""Frame for urgent transport messages that need to be sent immediately.
|
|
1190
|
+
|
|
1191
|
+
.. deprecated:: 0.0.87
|
|
1192
|
+
This frame is deprecated and will be removed in a future version.
|
|
1193
|
+
Instead, use `OutputTransportMessageUrgentFrame`.
|
|
1194
|
+
|
|
1195
|
+
Parameters:
|
|
1196
|
+
message: The urgent transport message payload.
|
|
1197
|
+
"""
|
|
1198
|
+
|
|
1199
|
+
def __post_init__(self):
|
|
1200
|
+
super().__post_init__()
|
|
1201
|
+
import warnings
|
|
1202
|
+
|
|
1203
|
+
with warnings.catch_warnings():
|
|
1204
|
+
warnings.simplefilter("always")
|
|
1205
|
+
warnings.warn(
|
|
1206
|
+
"TransportMessageUrgentFrame is deprecated and will be removed in a future version. "
|
|
1207
|
+
"Instead, use OutputTransportMessageFrame.",
|
|
1208
|
+
DeprecationWarning,
|
|
1209
|
+
stacklevel=2,
|
|
1210
|
+
)
|
|
1146
1211
|
|
|
1147
1212
|
|
|
1148
1213
|
@dataclass
|
|
@@ -1253,23 +1318,6 @@ class UserImageRawFrame(InputImageRawFrame):
|
|
|
1253
1318
|
return f"{self.name}(pts: {pts}, user: {self.user_id}, source: {self.transport_source}, size: {self.size}, format: {self.format}, request: {self.request})"
|
|
1254
1319
|
|
|
1255
1320
|
|
|
1256
|
-
@dataclass
|
|
1257
|
-
class VisionImageRawFrame(InputImageRawFrame):
|
|
1258
|
-
"""Image frame for vision/image analysis with associated text prompt.
|
|
1259
|
-
|
|
1260
|
-
An image with an associated text to ask for a description of it.
|
|
1261
|
-
|
|
1262
|
-
Parameters:
|
|
1263
|
-
text: Optional text prompt describing what to analyze in the image.
|
|
1264
|
-
"""
|
|
1265
|
-
|
|
1266
|
-
text: Optional[str] = None
|
|
1267
|
-
|
|
1268
|
-
def __str__(self):
|
|
1269
|
-
pts = format_pts(self.pts)
|
|
1270
|
-
return f"{self.name}(pts: {pts}, text: [{self.text}], size: {self.size}, format: {self.format})"
|
|
1271
|
-
|
|
1272
|
-
|
|
1273
1321
|
@dataclass
|
|
1274
1322
|
class InputDTMFFrame(DTMFFrame, SystemFrame):
|
|
1275
1323
|
"""DTMF keypress input frame from transport."""
|
|
@@ -1306,6 +1354,116 @@ class SpeechControlParamsFrame(SystemFrame):
|
|
|
1306
1354
|
turn_params: Optional[SmartTurnParams] = None
|
|
1307
1355
|
|
|
1308
1356
|
|
|
1357
|
+
@dataclass
|
|
1358
|
+
class StartDTMFCaptureFrame(SystemFrame):
|
|
1359
|
+
"""System frame indicating the bot is actively collecting DTMF input."""
|
|
1360
|
+
|
|
1361
|
+
pass
|
|
1362
|
+
|
|
1363
|
+
|
|
1364
|
+
@dataclass
|
|
1365
|
+
class EndDTMFCaptureFrame(SystemFrame):
|
|
1366
|
+
"""System frame indicating DTMF collection has finished."""
|
|
1367
|
+
|
|
1368
|
+
pass
|
|
1369
|
+
|
|
1370
|
+
|
|
1371
|
+
# Task frames
|
|
1372
|
+
#
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
@dataclass
|
|
1376
|
+
class TaskFrame(SystemFrame):
|
|
1377
|
+
"""Base frame for task frames.
|
|
1378
|
+
|
|
1379
|
+
This is a base class for frames that are meant to be sent and handled
|
|
1380
|
+
upstream by the pipeline task. This might result in a corresponding frame
|
|
1381
|
+
sent downstream (e.g. `InterruptionTaskFrame` / `InterruptionFrame` or
|
|
1382
|
+
`EndTaskFrame` / `EndFrame`).
|
|
1383
|
+
|
|
1384
|
+
"""
|
|
1385
|
+
|
|
1386
|
+
pass
|
|
1387
|
+
|
|
1388
|
+
|
|
1389
|
+
@dataclass
|
|
1390
|
+
class EndTaskFrame(TaskFrame):
|
|
1391
|
+
"""Frame to request graceful pipeline task closure.
|
|
1392
|
+
|
|
1393
|
+
This is used to notify the pipeline task that the pipeline should be
|
|
1394
|
+
closed nicely (flushing all the queued frames) by pushing an EndFrame
|
|
1395
|
+
downstream. This frame should be pushed upstream.
|
|
1396
|
+
"""
|
|
1397
|
+
|
|
1398
|
+
pass
|
|
1399
|
+
|
|
1400
|
+
|
|
1401
|
+
@dataclass
|
|
1402
|
+
class CancelTaskFrame(TaskFrame):
|
|
1403
|
+
"""Frame to request immediate pipeline task cancellation.
|
|
1404
|
+
|
|
1405
|
+
This is used to notify the pipeline task that the pipeline should be
|
|
1406
|
+
stopped immediately by pushing a CancelFrame downstream. This frame
|
|
1407
|
+
should be pushed upstream.
|
|
1408
|
+
"""
|
|
1409
|
+
|
|
1410
|
+
pass
|
|
1411
|
+
|
|
1412
|
+
|
|
1413
|
+
@dataclass
|
|
1414
|
+
class StopTaskFrame(TaskFrame):
|
|
1415
|
+
"""Frame to request pipeline task stop while keeping processors running.
|
|
1416
|
+
|
|
1417
|
+
This is used to notify the pipeline task that it should be stopped as
|
|
1418
|
+
soon as possible (flushing all the queued frames) but that the pipeline
|
|
1419
|
+
processors should be kept in a running state. This frame should be pushed
|
|
1420
|
+
upstream.
|
|
1421
|
+
"""
|
|
1422
|
+
|
|
1423
|
+
pass
|
|
1424
|
+
|
|
1425
|
+
|
|
1426
|
+
@dataclass
|
|
1427
|
+
class InterruptionTaskFrame(TaskFrame):
|
|
1428
|
+
"""Frame indicating the bot should be interrupted.
|
|
1429
|
+
|
|
1430
|
+
Emitted when the bot should be interrupted. This will mainly cause the
|
|
1431
|
+
same actions as if the user interrupted except that the
|
|
1432
|
+
UserStartedSpeakingFrame and UserStoppedSpeakingFrame won't be generated.
|
|
1433
|
+
This frame should be pushed upstream.
|
|
1434
|
+
"""
|
|
1435
|
+
|
|
1436
|
+
pass
|
|
1437
|
+
|
|
1438
|
+
|
|
1439
|
+
@dataclass
|
|
1440
|
+
class BotInterruptionFrame(InterruptionTaskFrame):
|
|
1441
|
+
"""Frame indicating the bot should be interrupted.
|
|
1442
|
+
|
|
1443
|
+
.. deprecated:: 0.0.85
|
|
1444
|
+
This frame is deprecated and will be removed in a future version.
|
|
1445
|
+
Instead, use `InterruptionTaskFrame`.
|
|
1446
|
+
|
|
1447
|
+
Emitted when the bot should be interrupted. This will mainly cause the
|
|
1448
|
+
same actions as if the user interrupted except that the
|
|
1449
|
+
UserStartedSpeakingFrame and UserStoppedSpeakingFrame won't be generated.
|
|
1450
|
+
This frame should be pushed upstream.
|
|
1451
|
+
"""
|
|
1452
|
+
|
|
1453
|
+
def __post_init__(self):
|
|
1454
|
+
super().__post_init__()
|
|
1455
|
+
import warnings
|
|
1456
|
+
|
|
1457
|
+
with warnings.catch_warnings():
|
|
1458
|
+
warnings.simplefilter("always")
|
|
1459
|
+
warnings.warn(
|
|
1460
|
+
"BotInterruptionFrame is deprecated and will be removed in a future version. "
|
|
1461
|
+
"Instead, use InterruptionTaskFrame.",
|
|
1462
|
+
DeprecationWarning,
|
|
1463
|
+
stacklevel=2,
|
|
1464
|
+
)
|
|
1465
|
+
|
|
1466
|
+
|
|
1309
1467
|
#
|
|
1310
1468
|
# Control frames
|
|
1311
1469
|
#
|
|
@@ -1473,6 +1631,17 @@ class STTUpdateSettingsFrame(ServiceUpdateSettingsFrame):
|
|
|
1473
1631
|
pass
|
|
1474
1632
|
|
|
1475
1633
|
|
|
1634
|
+
@dataclass
|
|
1635
|
+
class DTMFUpdateSettingsFrame(ServiceUpdateSettingsFrame):
|
|
1636
|
+
"""Frame for updating DTMF aggregator settings.
|
|
1637
|
+
|
|
1638
|
+
Updates DTMF configuration dynamically during conversation flow.
|
|
1639
|
+
Settings can include: timeout, digits, end, reset parameters.
|
|
1640
|
+
"""
|
|
1641
|
+
|
|
1642
|
+
pass
|
|
1643
|
+
|
|
1644
|
+
|
|
1476
1645
|
@dataclass
|
|
1477
1646
|
class VADParamsUpdateFrame(SystemFrame):
|
|
1478
1647
|
"""Frame for updating VAD parameters.
|
|
@@ -1545,23 +1714,34 @@ class MixerEnableFrame(MixerControlFrame):
|
|
|
1545
1714
|
enable: bool
|
|
1546
1715
|
|
|
1547
1716
|
|
|
1717
|
+
@dataclass
|
|
1718
|
+
class ServiceSwitcherFrame(ControlFrame):
|
|
1719
|
+
"""A base class for frames that affect ServiceSwitcher behavior."""
|
|
1720
|
+
|
|
1721
|
+
|
|
1548
1722
|
@dataclass
|
|
1549
1723
|
class StartUserIdleProcessorFrame(SystemFrame):
|
|
1550
1724
|
"""Frame to start the UserIdleProcessor monitoring."""
|
|
1551
1725
|
|
|
1552
1726
|
|
|
1553
1727
|
@dataclass
|
|
1554
|
-
class
|
|
1555
|
-
"""
|
|
1728
|
+
class StopUserIdleProcessorFrame(SystemFrame):
|
|
1729
|
+
"""Frame to stop the UserIdleProcessor monitoring."""
|
|
1556
1730
|
|
|
1557
1731
|
pass
|
|
1558
1732
|
|
|
1559
1733
|
|
|
1560
1734
|
@dataclass
|
|
1561
|
-
class
|
|
1562
|
-
"""Frame to
|
|
1735
|
+
class ResetUserIdleProcessorFrame(SystemFrame):
|
|
1736
|
+
"""Frame signalling the UserIdleProcessor to treat the user as active again.
|
|
1563
1737
|
|
|
1564
|
-
|
|
1738
|
+
Parameters:
|
|
1739
|
+
text: Optional transcription text that triggered the reset.
|
|
1740
|
+
is_final: Whether the transcription was marked final by STT.
|
|
1741
|
+
"""
|
|
1742
|
+
|
|
1743
|
+
text: str = ""
|
|
1744
|
+
is_final: bool = False
|
|
1565
1745
|
|
|
1566
1746
|
|
|
1567
1747
|
@dataclass
|
|
@@ -1571,6 +1751,17 @@ class WaitForDTMFFrame(ControlFrame):
|
|
|
1571
1751
|
pass
|
|
1572
1752
|
|
|
1573
1753
|
|
|
1754
|
+
@dataclass
|
|
1755
|
+
class CallTransferFrame(ControlFrame):
|
|
1756
|
+
"""Frame to request call transfer to another number/destination.
|
|
1757
|
+
|
|
1758
|
+
Parameters:
|
|
1759
|
+
target: The phone number or destination to transfer the call to.
|
|
1760
|
+
"""
|
|
1761
|
+
|
|
1762
|
+
target: str
|
|
1763
|
+
|
|
1764
|
+
|
|
1574
1765
|
@dataclass
|
|
1575
1766
|
class ManuallySwitchServiceFrame(ServiceSwitcherFrame):
|
|
1576
1767
|
"""A frame to request a manual switch in the active service in a ServiceSwitcher.
|
|
@@ -54,7 +54,7 @@ class DebugLogObserver(BaseObserver):
|
|
|
54
54
|
|
|
55
55
|
Log frames with specific source/destination filters::
|
|
56
56
|
|
|
57
|
-
from pipecat.frames.frames import
|
|
57
|
+
from pipecat.frames.frames import InterruptionFrame, UserStartedSpeakingFrame, LLMTextFrame
|
|
58
58
|
from pipecat.observers.loggers.debug_log_observer import DebugLogObserver, FrameEndpoint
|
|
59
59
|
from pipecat.transports.base_output import BaseOutputTransport
|
|
60
60
|
from pipecat.services.stt_service import STTService
|
|
@@ -62,8 +62,8 @@ class DebugLogObserver(BaseObserver):
|
|
|
62
62
|
observers=[
|
|
63
63
|
DebugLogObserver(
|
|
64
64
|
frame_types={
|
|
65
|
-
# Only log
|
|
66
|
-
|
|
65
|
+
# Only log InterruptionFrame when source is BaseOutputTransport
|
|
66
|
+
InterruptionFrame: (BaseOutputTransport, FrameEndpoint.SOURCE),
|
|
67
67
|
# Only log UserStartedSpeakingFrame when destination is STTService
|
|
68
68
|
UserStartedSpeakingFrame: (STTService, FrameEndpoint.DESTINATION),
|
|
69
69
|
# Log LLMTextFrame regardless of source or destination type
|
|
@@ -11,6 +11,7 @@ from loguru import logger
|
|
|
11
11
|
from pipecat.frames.frames import (
|
|
12
12
|
FunctionCallInProgressFrame,
|
|
13
13
|
FunctionCallResultFrame,
|
|
14
|
+
LLMContextFrame,
|
|
14
15
|
LLMFullResponseEndFrame,
|
|
15
16
|
LLMFullResponseStartFrame,
|
|
16
17
|
LLMMessagesFrame,
|
|
@@ -79,10 +80,13 @@ class LLMLogObserver(BaseObserver):
|
|
|
79
80
|
f"🧠 {arrow} {dst} LLM MESSAGES FRAME: {frame.messages} at {time_sec:.2f}s"
|
|
80
81
|
)
|
|
81
82
|
# Log OpenAILLMContextFrame (input)
|
|
82
|
-
elif isinstance(frame, OpenAILLMContextFrame):
|
|
83
|
-
|
|
84
|
-
|
|
83
|
+
elif isinstance(frame, (LLMContextFrame, OpenAILLMContextFrame)):
|
|
84
|
+
messages = (
|
|
85
|
+
frame.context.messages
|
|
86
|
+
if isinstance(frame, OpenAILLMContextFrame)
|
|
87
|
+
else frame.context.get_messages()
|
|
85
88
|
)
|
|
89
|
+
logger.debug(f"🧠 {arrow} {dst} LLM CONTEXT FRAME: {messages} at {time_sec:.2f}s")
|
|
86
90
|
# Log function call result (input)
|
|
87
91
|
elif isinstance(frame, FunctionCallResultFrame):
|
|
88
92
|
logger.debug(
|
|
@@ -61,17 +61,29 @@ class UserBotLatencyLogObserver(BaseObserver):
|
|
|
61
61
|
elif isinstance(data.frame, UserStoppedSpeakingFrame):
|
|
62
62
|
self._user_stopped_time = time.time()
|
|
63
63
|
elif isinstance(data.frame, (EndFrame, CancelFrame)):
|
|
64
|
-
|
|
65
|
-
avg_latency = mean(self._latencies)
|
|
66
|
-
min_latency = min(self._latencies)
|
|
67
|
-
max_latency = max(self._latencies)
|
|
68
|
-
logger.info(
|
|
69
|
-
f"⏱️ LATENCY FROM USER STOPPED SPEAKING TO BOT STARTED SPEAKING - Avg: {avg_latency:.3f}s, Min: {min_latency:.3f}s, Max: {max_latency:.3f}s"
|
|
70
|
-
)
|
|
64
|
+
self._log_summary()
|
|
71
65
|
elif isinstance(data.frame, BotStartedSpeakingFrame) and self._user_stopped_time:
|
|
72
66
|
latency = time.time() - self._user_stopped_time
|
|
73
67
|
self._user_stopped_time = 0
|
|
74
68
|
self._latencies.append(latency)
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
69
|
+
self._log_latency(latency)
|
|
70
|
+
|
|
71
|
+
def _log_summary(self):
|
|
72
|
+
if not self._latencies:
|
|
73
|
+
return
|
|
74
|
+
avg_latency = mean(self._latencies)
|
|
75
|
+
min_latency = min(self._latencies)
|
|
76
|
+
max_latency = max(self._latencies)
|
|
77
|
+
logger.info(
|
|
78
|
+
f"⏱️ LATENCY FROM USER STOPPED SPEAKING TO BOT STARTED SPEAKING - Avg: {avg_latency:.3f}s, Min: {min_latency:.3f}s, Max: {max_latency:.3f}s"
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
def _log_latency(self, latency: float):
|
|
82
|
+
"""Log the latency.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
latency: The latency to log.
|
|
86
|
+
"""
|
|
87
|
+
logger.debug(
|
|
88
|
+
f"⏱️ LATENCY FROM USER STOPPED SPEAKING TO BOT STARTED SPEAKING: {latency:.3f}s"
|
|
89
|
+
)
|