dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -13,6 +13,7 @@ and frame observation for the RTVI protocol.
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import base64
|
|
16
|
+
import time
|
|
16
17
|
from dataclasses import dataclass
|
|
17
18
|
from typing import (
|
|
18
19
|
Any,
|
|
@@ -29,8 +30,8 @@ from typing import (
|
|
|
29
30
|
from loguru import logger
|
|
30
31
|
from pydantic import BaseModel, Field, PrivateAttr, ValidationError
|
|
31
32
|
|
|
33
|
+
from pipecat.audio.utils import calculate_audio_volume
|
|
32
34
|
from pipecat.frames.frames import (
|
|
33
|
-
BotInterruptionFrame,
|
|
34
35
|
BotStartedSpeakingFrame,
|
|
35
36
|
BotStoppedSpeakingFrame,
|
|
36
37
|
CancelFrame,
|
|
@@ -41,17 +42,20 @@ from pipecat.frames.frames import (
|
|
|
41
42
|
Frame,
|
|
42
43
|
FunctionCallResultFrame,
|
|
43
44
|
InputAudioRawFrame,
|
|
45
|
+
InputTransportMessageFrame,
|
|
44
46
|
InterimTranscriptionFrame,
|
|
47
|
+
LLMConfigureOutputFrame,
|
|
45
48
|
LLMContextFrame,
|
|
46
49
|
LLMFullResponseEndFrame,
|
|
47
50
|
LLMFullResponseStartFrame,
|
|
48
51
|
LLMMessagesAppendFrame,
|
|
49
52
|
LLMTextFrame,
|
|
50
53
|
MetricsFrame,
|
|
54
|
+
OutputTransportMessageUrgentFrame,
|
|
51
55
|
StartFrame,
|
|
52
56
|
SystemFrame,
|
|
53
57
|
TranscriptionFrame,
|
|
54
|
-
|
|
58
|
+
TTSAudioRawFrame,
|
|
55
59
|
TTSStartedFrame,
|
|
56
60
|
TTSStoppedFrame,
|
|
57
61
|
TTSTextFrame,
|
|
@@ -587,10 +591,35 @@ class RTVILLMFunctionCallMessage(BaseModel):
|
|
|
587
591
|
data: RTVILLMFunctionCallMessageData
|
|
588
592
|
|
|
589
593
|
|
|
594
|
+
class RTVISendTextOptions(BaseModel):
|
|
595
|
+
"""Options for sending text input to the LLM.
|
|
596
|
+
|
|
597
|
+
Contains options for how the pipeline should process the text input.
|
|
598
|
+
"""
|
|
599
|
+
|
|
600
|
+
run_immediately: bool = True
|
|
601
|
+
audio_response: bool = True
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
class RTVISendTextData(BaseModel):
|
|
605
|
+
"""Data format for sending text input to the LLM.
|
|
606
|
+
|
|
607
|
+
Contains the text content to send and any options for how the pipeline should process it.
|
|
608
|
+
|
|
609
|
+
"""
|
|
610
|
+
|
|
611
|
+
content: str
|
|
612
|
+
options: Optional[RTVISendTextOptions] = None
|
|
613
|
+
|
|
614
|
+
|
|
590
615
|
class RTVIAppendToContextData(BaseModel):
|
|
591
616
|
"""Data format for appending messages to the context.
|
|
592
617
|
|
|
593
618
|
Contains the role, content, and whether to run the message immediately.
|
|
619
|
+
|
|
620
|
+
.. deprecated:: 0.0.85
|
|
621
|
+
The RTVI message, append-to-context, has been deprecated. Use send-text
|
|
622
|
+
or custom client and server messages instead.
|
|
594
623
|
"""
|
|
595
624
|
|
|
596
625
|
role: Literal["user", "assistant"] | str
|
|
@@ -814,6 +843,36 @@ class RTVIServerMessage(BaseModel):
|
|
|
814
843
|
data: Any
|
|
815
844
|
|
|
816
845
|
|
|
846
|
+
class RTVIAudioLevelMessageData(BaseModel):
|
|
847
|
+
"""Data format for sending audio levels."""
|
|
848
|
+
|
|
849
|
+
value: float
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
class RTVIUserAudioLevelMessage(BaseModel):
|
|
853
|
+
"""Message indicating user audio level."""
|
|
854
|
+
|
|
855
|
+
label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
|
|
856
|
+
type: Literal["user-audio-level"] = "user-audio-level"
|
|
857
|
+
data: RTVIAudioLevelMessageData
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
class RTVIBotAudioLevelMessage(BaseModel):
|
|
861
|
+
"""Message indicating bot audio level."""
|
|
862
|
+
|
|
863
|
+
label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
|
|
864
|
+
type: Literal["bot-audio-level"] = "bot-audio-level"
|
|
865
|
+
data: RTVIAudioLevelMessageData
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
class RTVISystemLogMessage(BaseModel):
|
|
869
|
+
"""Message including a system log."""
|
|
870
|
+
|
|
871
|
+
label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
|
|
872
|
+
type: Literal["system-log"] = "system-log"
|
|
873
|
+
data: RTVITextMessageData
|
|
874
|
+
|
|
875
|
+
|
|
817
876
|
@dataclass
|
|
818
877
|
class RTVIServerMessageFrame(SystemFrame):
|
|
819
878
|
"""A frame for sending server messages to the client.
|
|
@@ -833,25 +892,36 @@ class RTVIServerMessageFrame(SystemFrame):
|
|
|
833
892
|
class RTVIObserverParams:
|
|
834
893
|
"""Parameters for configuring RTVI Observer behavior.
|
|
835
894
|
|
|
895
|
+
.. deprecated:: 0.0.87
|
|
896
|
+
Parameter `errors_enabled` is deprecated. Error messages are always enabled.
|
|
897
|
+
|
|
836
898
|
Parameters:
|
|
837
899
|
bot_llm_enabled: Indicates if the bot's LLM messages should be sent.
|
|
838
900
|
bot_tts_enabled: Indicates if the bot's TTS messages should be sent.
|
|
839
901
|
bot_speaking_enabled: Indicates if the bot's started/stopped speaking messages should be sent.
|
|
902
|
+
bot_audio_level_enabled: Indicates if bot's audio level messages should be sent.
|
|
840
903
|
user_llm_enabled: Indicates if the user's LLM input messages should be sent.
|
|
841
904
|
user_speaking_enabled: Indicates if the user's started/stopped speaking messages should be sent.
|
|
842
905
|
user_transcription_enabled: Indicates if user's transcription messages should be sent.
|
|
906
|
+
user_audio_level_enabled: Indicates if user's audio level messages should be sent.
|
|
843
907
|
metrics_enabled: Indicates if metrics messages should be sent.
|
|
844
|
-
|
|
908
|
+
system_logs_enabled: Indicates if system logs should be sent.
|
|
909
|
+
errors_enabled: [Deprecated] Indicates if errors messages should be sent.
|
|
910
|
+
audio_level_period_secs: How often audio levels should be sent if enabled.
|
|
845
911
|
"""
|
|
846
912
|
|
|
847
913
|
bot_llm_enabled: bool = True
|
|
848
914
|
bot_tts_enabled: bool = True
|
|
849
915
|
bot_speaking_enabled: bool = True
|
|
916
|
+
bot_audio_level_enabled: bool = False
|
|
850
917
|
user_llm_enabled: bool = True
|
|
851
918
|
user_speaking_enabled: bool = True
|
|
852
919
|
user_transcription_enabled: bool = True
|
|
920
|
+
user_audio_level_enabled: bool = False
|
|
853
921
|
metrics_enabled: bool = True
|
|
854
|
-
|
|
922
|
+
system_logs_enabled: bool = False
|
|
923
|
+
errors_enabled: Optional[bool] = None
|
|
924
|
+
audio_level_period_secs: float = 0.15
|
|
855
925
|
|
|
856
926
|
|
|
857
927
|
class RTVIObserver(BaseObserver):
|
|
@@ -867,7 +937,11 @@ class RTVIObserver(BaseObserver):
|
|
|
867
937
|
"""
|
|
868
938
|
|
|
869
939
|
def __init__(
|
|
870
|
-
self,
|
|
940
|
+
self,
|
|
941
|
+
rtvi: Optional["RTVIProcessor"] = None,
|
|
942
|
+
*,
|
|
943
|
+
params: Optional[RTVIObserverParams] = None,
|
|
944
|
+
**kwargs,
|
|
871
945
|
):
|
|
872
946
|
"""Initialize the RTVI observer.
|
|
873
947
|
|
|
@@ -879,9 +953,50 @@ class RTVIObserver(BaseObserver):
|
|
|
879
953
|
super().__init__(**kwargs)
|
|
880
954
|
self._rtvi = rtvi
|
|
881
955
|
self._params = params or RTVIObserverParams()
|
|
882
|
-
|
|
956
|
+
|
|
883
957
|
self._frames_seen = set()
|
|
884
|
-
|
|
958
|
+
|
|
959
|
+
self._bot_transcription = ""
|
|
960
|
+
self._last_user_audio_level = 0
|
|
961
|
+
self._last_bot_audio_level = 0
|
|
962
|
+
|
|
963
|
+
if self._params.system_logs_enabled:
|
|
964
|
+
self._system_logger_id = logger.add(self._logger_sink)
|
|
965
|
+
|
|
966
|
+
if self._params.errors_enabled is not None:
|
|
967
|
+
import warnings
|
|
968
|
+
|
|
969
|
+
with warnings.catch_warnings():
|
|
970
|
+
warnings.simplefilter("always")
|
|
971
|
+
warnings.warn(
|
|
972
|
+
"Parameter `errors_enabled` is deprecated. Error messages are always enabled.",
|
|
973
|
+
DeprecationWarning,
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
async def _logger_sink(self, message):
|
|
977
|
+
"""Logger sink so we cna send system logs to RTVI clients."""
|
|
978
|
+
message = RTVISystemLogMessage(data=RTVITextMessageData(text=message))
|
|
979
|
+
await self.send_rtvi_message(message)
|
|
980
|
+
|
|
981
|
+
async def cleanup(self):
|
|
982
|
+
"""Cleanup RTVI observer resources."""
|
|
983
|
+
await super().cleanup()
|
|
984
|
+
if self._params.system_logs_enabled:
|
|
985
|
+
logger.remove(self._system_logger_id)
|
|
986
|
+
|
|
987
|
+
async def send_rtvi_message(self, model: BaseModel, exclude_none: bool = True):
|
|
988
|
+
"""Send an RTVI message.
|
|
989
|
+
|
|
990
|
+
By default, we push a transport frame. But this function can be
|
|
991
|
+
overriden by subclass to send RTVI messages in different ways.
|
|
992
|
+
|
|
993
|
+
Args:
|
|
994
|
+
model: The message to send.
|
|
995
|
+
exclude_none: Whether to exclude None values from the model dump.
|
|
996
|
+
|
|
997
|
+
"""
|
|
998
|
+
if self._rtvi:
|
|
999
|
+
await self._rtvi.push_transport_message(model, exclude_none)
|
|
885
1000
|
|
|
886
1001
|
async def on_push_frame(self, data: FramePushed):
|
|
887
1002
|
"""Process a frame being pushed through the pipeline.
|
|
@@ -903,6 +1018,7 @@ class RTVIObserver(BaseObserver):
|
|
|
903
1018
|
|
|
904
1019
|
if (
|
|
905
1020
|
isinstance(frame, (UserStartedSpeakingFrame, UserStoppedSpeakingFrame))
|
|
1021
|
+
and (direction == FrameDirection.DOWNSTREAM)
|
|
906
1022
|
and self._params.user_speaking_enabled
|
|
907
1023
|
):
|
|
908
1024
|
await self._handle_interruptions(frame)
|
|
@@ -923,52 +1039,58 @@ class RTVIObserver(BaseObserver):
|
|
|
923
1039
|
):
|
|
924
1040
|
await self._handle_context(frame)
|
|
925
1041
|
elif isinstance(frame, LLMFullResponseStartFrame) and self._params.bot_llm_enabled:
|
|
926
|
-
await self.
|
|
1042
|
+
await self.send_rtvi_message(RTVIBotLLMStartedMessage())
|
|
927
1043
|
elif isinstance(frame, LLMFullResponseEndFrame) and self._params.bot_llm_enabled:
|
|
928
|
-
await self.
|
|
1044
|
+
await self.send_rtvi_message(RTVIBotLLMStoppedMessage())
|
|
929
1045
|
elif isinstance(frame, LLMTextFrame) and self._params.bot_llm_enabled:
|
|
930
1046
|
await self._handle_llm_text_frame(frame)
|
|
931
1047
|
elif isinstance(frame, TTSStartedFrame) and self._params.bot_tts_enabled:
|
|
932
|
-
await self.
|
|
1048
|
+
await self.send_rtvi_message(RTVIBotTTSStartedMessage())
|
|
933
1049
|
elif isinstance(frame, TTSStoppedFrame) and self._params.bot_tts_enabled:
|
|
934
|
-
await self.
|
|
1050
|
+
await self.send_rtvi_message(RTVIBotTTSStoppedMessage())
|
|
935
1051
|
elif isinstance(frame, TTSTextFrame) and self._params.bot_tts_enabled:
|
|
936
1052
|
if isinstance(src, BaseOutputTransport):
|
|
937
1053
|
message = RTVIBotTTSTextMessage(data=RTVITextMessageData(text=frame.text))
|
|
938
|
-
await self.
|
|
1054
|
+
await self.send_rtvi_message(message)
|
|
939
1055
|
else:
|
|
940
1056
|
mark_as_seen = False
|
|
941
1057
|
elif isinstance(frame, MetricsFrame) and self._params.metrics_enabled:
|
|
942
1058
|
await self._handle_metrics(frame)
|
|
943
1059
|
elif isinstance(frame, RTVIServerMessageFrame):
|
|
944
1060
|
message = RTVIServerMessage(data=frame.data)
|
|
945
|
-
await self.
|
|
1061
|
+
await self.send_rtvi_message(message)
|
|
946
1062
|
elif isinstance(frame, RTVIServerResponseFrame):
|
|
947
1063
|
if frame.error is not None:
|
|
948
1064
|
await self._send_error_response(frame)
|
|
949
1065
|
else:
|
|
950
1066
|
await self._send_server_response(frame)
|
|
1067
|
+
elif isinstance(frame, InputAudioRawFrame) and self._params.user_audio_level_enabled:
|
|
1068
|
+
curr_time = time.time()
|
|
1069
|
+
diff_time = curr_time - self._last_user_audio_level
|
|
1070
|
+
if diff_time > self._params.audio_level_period_secs:
|
|
1071
|
+
level = calculate_audio_volume(frame.audio, frame.sample_rate)
|
|
1072
|
+
message = RTVIUserAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
|
|
1073
|
+
await self.send_rtvi_message(message)
|
|
1074
|
+
self._last_user_audio_level = curr_time
|
|
1075
|
+
elif isinstance(frame, TTSAudioRawFrame) and self._params.bot_audio_level_enabled:
|
|
1076
|
+
curr_time = time.time()
|
|
1077
|
+
diff_time = curr_time - self._last_bot_audio_level
|
|
1078
|
+
if diff_time > self._params.audio_level_period_secs:
|
|
1079
|
+
level = calculate_audio_volume(frame.audio, frame.sample_rate)
|
|
1080
|
+
message = RTVIBotAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
|
|
1081
|
+
await self.send_rtvi_message(message)
|
|
1082
|
+
self._last_bot_audio_level = curr_time
|
|
951
1083
|
|
|
952
1084
|
if mark_as_seen:
|
|
953
1085
|
self._frames_seen.add(frame.id)
|
|
954
1086
|
|
|
955
|
-
async def push_transport_message_urgent(self, model: BaseModel, exclude_none: bool = True):
|
|
956
|
-
"""Push an urgent transport message to the RTVI processor.
|
|
957
|
-
|
|
958
|
-
Args:
|
|
959
|
-
model: The message model to send.
|
|
960
|
-
exclude_none: Whether to exclude None values from the model dump.
|
|
961
|
-
"""
|
|
962
|
-
frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
|
|
963
|
-
await self._rtvi.push_frame(frame)
|
|
964
|
-
|
|
965
1087
|
async def _push_bot_transcription(self):
|
|
966
1088
|
"""Push accumulated bot transcription as a message."""
|
|
967
1089
|
if len(self._bot_transcription) > 0:
|
|
968
1090
|
message = RTVIBotTranscriptionMessage(
|
|
969
1091
|
data=RTVITextMessageData(text=self._bot_transcription)
|
|
970
1092
|
)
|
|
971
|
-
await self.
|
|
1093
|
+
await self.send_rtvi_message(message)
|
|
972
1094
|
self._bot_transcription = ""
|
|
973
1095
|
|
|
974
1096
|
async def _handle_interruptions(self, frame: Frame):
|
|
@@ -980,7 +1102,7 @@ class RTVIObserver(BaseObserver):
|
|
|
980
1102
|
message = RTVIUserStoppedSpeakingMessage()
|
|
981
1103
|
|
|
982
1104
|
if message:
|
|
983
|
-
await self.
|
|
1105
|
+
await self.send_rtvi_message(message)
|
|
984
1106
|
|
|
985
1107
|
async def _handle_bot_speaking(self, frame: Frame):
|
|
986
1108
|
"""Handle bot speaking event frames."""
|
|
@@ -991,12 +1113,12 @@ class RTVIObserver(BaseObserver):
|
|
|
991
1113
|
message = RTVIBotStoppedSpeakingMessage()
|
|
992
1114
|
|
|
993
1115
|
if message:
|
|
994
|
-
await self.
|
|
1116
|
+
await self.send_rtvi_message(message)
|
|
995
1117
|
|
|
996
1118
|
async def _handle_llm_text_frame(self, frame: LLMTextFrame):
|
|
997
1119
|
"""Handle LLM text output frames."""
|
|
998
1120
|
message = RTVIBotLLMTextMessage(data=RTVITextMessageData(text=frame.text))
|
|
999
|
-
await self.
|
|
1121
|
+
await self.send_rtvi_message(message)
|
|
1000
1122
|
|
|
1001
1123
|
self._bot_transcription += frame.text
|
|
1002
1124
|
if match_endofsentence(self._bot_transcription):
|
|
@@ -1019,7 +1141,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1019
1141
|
)
|
|
1020
1142
|
|
|
1021
1143
|
if message:
|
|
1022
|
-
await self.
|
|
1144
|
+
await self.send_rtvi_message(message)
|
|
1023
1145
|
|
|
1024
1146
|
async def _handle_context(self, frame: OpenAILLMContextFrame | LLMContextFrame):
|
|
1025
1147
|
"""Process LLM context frames to extract user messages for the RTVI client."""
|
|
@@ -1039,7 +1161,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1039
1161
|
text = "".join(part.text for part in message.parts if hasattr(part, "text"))
|
|
1040
1162
|
if text:
|
|
1041
1163
|
rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
|
|
1042
|
-
await self.
|
|
1164
|
+
await self.send_rtvi_message(rtvi_message)
|
|
1043
1165
|
|
|
1044
1166
|
# Handle OpenAI format (original implementation)
|
|
1045
1167
|
elif isinstance(message, dict):
|
|
@@ -1050,7 +1172,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1050
1172
|
else:
|
|
1051
1173
|
text = content
|
|
1052
1174
|
rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
|
|
1053
|
-
await self.
|
|
1175
|
+
await self.send_rtvi_message(rtvi_message)
|
|
1054
1176
|
|
|
1055
1177
|
except Exception as e:
|
|
1056
1178
|
logger.warning(f"Caught an error while trying to handle context: {e}")
|
|
@@ -1077,7 +1199,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1077
1199
|
metrics["characters"].append(d.model_dump(exclude_none=True))
|
|
1078
1200
|
|
|
1079
1201
|
message = RTVIMetricsMessage(data=metrics)
|
|
1080
|
-
await self.
|
|
1202
|
+
await self.send_rtvi_message(message)
|
|
1081
1203
|
|
|
1082
1204
|
async def _send_server_response(self, frame: RTVIServerResponseFrame):
|
|
1083
1205
|
"""Send a response to the client for a specific request."""
|
|
@@ -1085,15 +1207,14 @@ class RTVIObserver(BaseObserver):
|
|
|
1085
1207
|
id=str(frame.client_msg.msg_id),
|
|
1086
1208
|
data=RTVIRawServerResponseData(t=frame.client_msg.type, d=frame.data),
|
|
1087
1209
|
)
|
|
1088
|
-
await self.
|
|
1210
|
+
await self.send_rtvi_message(message)
|
|
1089
1211
|
|
|
1090
1212
|
async def _send_error_response(self, frame: RTVIServerResponseFrame):
|
|
1091
1213
|
"""Send a response to the client for a specific request."""
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
await self.push_transport_message_urgent(message)
|
|
1214
|
+
message = RTVIErrorResponse(
|
|
1215
|
+
id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
|
|
1216
|
+
)
|
|
1217
|
+
await self.send_rtvi_message(message)
|
|
1097
1218
|
|
|
1098
1219
|
|
|
1099
1220
|
class RTVIProcessor(FrameProcessor):
|
|
@@ -1127,7 +1248,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1127
1248
|
# Default to 0.3.0 which is the last version before actually having a
|
|
1128
1249
|
# "client-version".
|
|
1129
1250
|
self._client_version = [0, 3, 0]
|
|
1130
|
-
self.
|
|
1251
|
+
self._skip_tts: bool = False # Keep in sync with llm_service.py
|
|
1131
1252
|
|
|
1132
1253
|
self._registered_actions: Dict[str, RTVIAction] = {}
|
|
1133
1254
|
self._registered_services: Dict[str, RTVIService] = {}
|
|
@@ -1196,17 +1317,9 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1196
1317
|
await self._update_config(self._config, False)
|
|
1197
1318
|
await self._send_bot_ready()
|
|
1198
1319
|
|
|
1199
|
-
def set_errors_enabled(self, enabled: bool):
|
|
1200
|
-
"""Enable or disable error message sending.
|
|
1201
|
-
|
|
1202
|
-
Args:
|
|
1203
|
-
enabled: Whether to send error messages.
|
|
1204
|
-
"""
|
|
1205
|
-
self._errors_enabled = enabled
|
|
1206
|
-
|
|
1207
1320
|
async def interrupt_bot(self):
|
|
1208
1321
|
"""Send a bot interruption frame upstream."""
|
|
1209
|
-
await self.
|
|
1322
|
+
await self.push_interruption_task_frame_and_wait()
|
|
1210
1323
|
|
|
1211
1324
|
async def send_server_message(self, data: Any):
|
|
1212
1325
|
"""Send a server message to the client."""
|
|
@@ -1232,6 +1345,13 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1232
1345
|
"""
|
|
1233
1346
|
await self._send_error_frame(ErrorFrame(error=error))
|
|
1234
1347
|
|
|
1348
|
+
async def push_transport_message(self, model: BaseModel, exclude_none: bool = True):
|
|
1349
|
+
"""Push a transport message frame."""
|
|
1350
|
+
frame = OutputTransportMessageUrgentFrame(
|
|
1351
|
+
message=model.model_dump(exclude_none=exclude_none)
|
|
1352
|
+
)
|
|
1353
|
+
await self.push_frame(frame)
|
|
1354
|
+
|
|
1235
1355
|
async def handle_message(self, message: RTVIMessage):
|
|
1236
1356
|
"""Handle an incoming RTVI message.
|
|
1237
1357
|
|
|
@@ -1252,7 +1372,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1252
1372
|
args=params.arguments,
|
|
1253
1373
|
)
|
|
1254
1374
|
message = RTVILLMFunctionCallMessage(data=fn)
|
|
1255
|
-
await self.
|
|
1375
|
+
await self.push_transport_message(message, exclude_none=False)
|
|
1256
1376
|
|
|
1257
1377
|
async def handle_function_call_start(
|
|
1258
1378
|
self, function_name: str, llm: FrameProcessor, context: OpenAILLMContext
|
|
@@ -1279,7 +1399,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1279
1399
|
|
|
1280
1400
|
fn = RTVILLMFunctionCallStartMessageData(function_name=function_name)
|
|
1281
1401
|
message = RTVILLMFunctionCallStartMessage(data=fn)
|
|
1282
|
-
await self.
|
|
1402
|
+
await self.push_transport_message(message, exclude_none=False)
|
|
1283
1403
|
|
|
1284
1404
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
1285
1405
|
"""Process incoming frames through the RTVI processor.
|
|
@@ -1302,7 +1422,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1302
1422
|
elif isinstance(frame, ErrorFrame):
|
|
1303
1423
|
await self._send_error_frame(frame)
|
|
1304
1424
|
await self.push_frame(frame, direction)
|
|
1305
|
-
elif isinstance(frame,
|
|
1425
|
+
elif isinstance(frame, InputTransportMessageFrame):
|
|
1306
1426
|
await self._handle_transport_message(frame)
|
|
1307
1427
|
# All other system frames
|
|
1308
1428
|
elif isinstance(frame, SystemFrame):
|
|
@@ -1316,6 +1436,9 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1316
1436
|
# Data frames
|
|
1317
1437
|
elif isinstance(frame, RTVIActionFrame):
|
|
1318
1438
|
await self._action_queue.put(frame)
|
|
1439
|
+
elif isinstance(frame, LLMConfigureOutputFrame):
|
|
1440
|
+
self._skip_tts = frame.skip_tts
|
|
1441
|
+
await self.push_frame(frame, direction)
|
|
1319
1442
|
# Other frames
|
|
1320
1443
|
else:
|
|
1321
1444
|
await self.push_frame(frame, direction)
|
|
@@ -1348,11 +1471,6 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1348
1471
|
await self.cancel_task(self._message_task)
|
|
1349
1472
|
self._message_task = None
|
|
1350
1473
|
|
|
1351
|
-
async def _push_transport_message(self, model: BaseModel, exclude_none: bool = True):
|
|
1352
|
-
"""Push a transport message frame."""
|
|
1353
|
-
frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
|
|
1354
|
-
await self.push_frame(frame)
|
|
1355
|
-
|
|
1356
1474
|
async def _action_task_handler(self):
|
|
1357
1475
|
"""Handle incoming action frames."""
|
|
1358
1476
|
while True:
|
|
@@ -1367,7 +1485,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1367
1485
|
await self._handle_message(message)
|
|
1368
1486
|
self._message_queue.task_done()
|
|
1369
1487
|
|
|
1370
|
-
async def _handle_transport_message(self, frame:
|
|
1488
|
+
async def _handle_transport_message(self, frame: InputTransportMessageFrame):
|
|
1371
1489
|
"""Handle an incoming transport message frame."""
|
|
1372
1490
|
try:
|
|
1373
1491
|
transport_message = frame.message
|
|
@@ -1415,7 +1533,13 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1415
1533
|
case "llm-function-call-result":
|
|
1416
1534
|
data = RTVILLMFunctionCallResultData.model_validate(message.data)
|
|
1417
1535
|
await self._handle_function_call_result(data)
|
|
1536
|
+
case "send-text":
|
|
1537
|
+
data = RTVISendTextData.model_validate(message.data)
|
|
1538
|
+
await self._handle_send_text(data)
|
|
1418
1539
|
case "append-to-context":
|
|
1540
|
+
logger.warning(
|
|
1541
|
+
f"The append-to-context message is deprecated, use send-text instead."
|
|
1542
|
+
)
|
|
1419
1543
|
data = RTVIAppendToContextData.model_validate(message.data)
|
|
1420
1544
|
await self._handle_update_context(data)
|
|
1421
1545
|
case "raw-audio" | "raw-audio-batch":
|
|
@@ -1483,7 +1607,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1483
1607
|
|
|
1484
1608
|
services = list(self._registered_services.values())
|
|
1485
1609
|
message = RTVIDescribeConfig(id=request_id, data=RTVIDescribeConfigData(config=services))
|
|
1486
|
-
await self.
|
|
1610
|
+
await self.push_transport_message(message)
|
|
1487
1611
|
|
|
1488
1612
|
async def _handle_describe_actions(self, request_id: str):
|
|
1489
1613
|
"""Handle a describe-actions request."""
|
|
@@ -1498,7 +1622,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1498
1622
|
|
|
1499
1623
|
actions = list(self._registered_actions.values())
|
|
1500
1624
|
message = RTVIDescribeActions(id=request_id, data=RTVIDescribeActionsData(actions=actions))
|
|
1501
|
-
await self.
|
|
1625
|
+
await self.push_transport_message(message)
|
|
1502
1626
|
|
|
1503
1627
|
async def _handle_get_config(self, request_id: str):
|
|
1504
1628
|
"""Handle a get-config request."""
|
|
@@ -1512,7 +1636,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1512
1636
|
)
|
|
1513
1637
|
|
|
1514
1638
|
message = RTVIConfigResponse(id=request_id, data=self._config)
|
|
1515
|
-
await self.
|
|
1639
|
+
await self.push_transport_message(message)
|
|
1516
1640
|
|
|
1517
1641
|
def _update_config_option(self, service: str, config: RTVIServiceOptionConfig):
|
|
1518
1642
|
"""Update a specific configuration option."""
|
|
@@ -1564,6 +1688,26 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1564
1688
|
await self._update_config(RTVIConfig(config=data.config), data.interrupt)
|
|
1565
1689
|
await self._handle_get_config(request_id)
|
|
1566
1690
|
|
|
1691
|
+
async def _handle_send_text(self, data: RTVISendTextData):
|
|
1692
|
+
"""Handle a send-text message from the client."""
|
|
1693
|
+
opts = data.options if data.options is not None else RTVISendTextOptions()
|
|
1694
|
+
if opts.run_immediately:
|
|
1695
|
+
await self.interrupt_bot()
|
|
1696
|
+
cur_skip_tts = self._skip_tts
|
|
1697
|
+
should_skip_tts = not opts.audio_response
|
|
1698
|
+
toggle_skip_tts = cur_skip_tts != should_skip_tts
|
|
1699
|
+
if toggle_skip_tts:
|
|
1700
|
+
output_frame = LLMConfigureOutputFrame(skip_tts=should_skip_tts)
|
|
1701
|
+
await self.push_frame(output_frame)
|
|
1702
|
+
text_frame = LLMMessagesAppendFrame(
|
|
1703
|
+
messages=[{"role": "user", "content": data.content}],
|
|
1704
|
+
run_llm=opts.run_immediately,
|
|
1705
|
+
)
|
|
1706
|
+
await self.push_frame(text_frame)
|
|
1707
|
+
if toggle_skip_tts:
|
|
1708
|
+
output_frame = LLMConfigureOutputFrame(skip_tts=cur_skip_tts)
|
|
1709
|
+
await self.push_frame(output_frame)
|
|
1710
|
+
|
|
1567
1711
|
async def _handle_update_context(self, data: RTVIAppendToContextData):
|
|
1568
1712
|
if data.run_immediately:
|
|
1569
1713
|
await self.interrupt_bot()
|
|
@@ -1617,7 +1761,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1617
1761
|
# action responses (such as webhooks) don't set a request_id
|
|
1618
1762
|
if request_id:
|
|
1619
1763
|
message = RTVIActionResponse(id=request_id, data=RTVIActionResponseData(result=result))
|
|
1620
|
-
await self.
|
|
1764
|
+
await self.push_transport_message(message)
|
|
1621
1765
|
|
|
1622
1766
|
async def _send_bot_ready(self):
|
|
1623
1767
|
"""Send the bot-ready message to the client."""
|
|
@@ -1628,23 +1772,21 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1628
1772
|
id=self._client_ready_id,
|
|
1629
1773
|
data=RTVIBotReadyData(version=RTVI_PROTOCOL_VERSION, config=config),
|
|
1630
1774
|
)
|
|
1631
|
-
await self.
|
|
1775
|
+
await self.push_transport_message(message)
|
|
1632
1776
|
|
|
1633
1777
|
async def _send_server_message(self, message: RTVIServerMessage | RTVIServerResponse):
|
|
1634
1778
|
"""Send a message or response to the client."""
|
|
1635
|
-
await self.
|
|
1779
|
+
await self.push_transport_message(message)
|
|
1636
1780
|
|
|
1637
1781
|
async def _send_error_frame(self, frame: ErrorFrame):
|
|
1638
1782
|
"""Send an error frame as an RTVI error message."""
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
await self._push_transport_message(message)
|
|
1783
|
+
message = RTVIError(data=RTVIErrorData(error=frame.error, fatal=frame.fatal))
|
|
1784
|
+
await self.push_transport_message(message)
|
|
1642
1785
|
|
|
1643
1786
|
async def _send_error_response(self, id: str, error: str):
|
|
1644
1787
|
"""Send an error response message."""
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
await self._push_transport_message(message)
|
|
1788
|
+
message = RTVIErrorResponse(id=id, data=RTVIErrorResponseData(error=error))
|
|
1789
|
+
await self.push_transport_message(message)
|
|
1648
1790
|
|
|
1649
1791
|
def _action_id(self, service: str, action: str) -> str:
|
|
1650
1792
|
"""Generate an action ID from service and action names."""
|