dv-pipecat-ai 0.0.85.dev7__py3-none-any.whl → 0.0.85.dev699__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/METADATA +78 -117
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/RECORD +158 -122
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +120 -87
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +12 -4
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +85 -24
- pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_response.py +6 -7
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/filters/stt_mute_filter.py +2 -0
- pipecat/processors/frame_processor.py +103 -17
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +209 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +4 -4
- pipecat/processors/user_idle_processor.py +3 -6
- pipecat/runner/run.py +270 -50
- pipecat/runner/types.py +2 -0
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +6 -9
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/asyncai/tts.py +2 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +122 -97
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +367 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1155 -0
- pipecat/services/aws/stt.py +1 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +13 -355
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/tts.py +2 -2
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +636 -0
- pipecat/services/elevenlabs/__init__.py +2 -1
- pipecat/services/elevenlabs/stt.py +254 -276
- pipecat/services/elevenlabs/tts.py +5 -5
- pipecat/services/fish/tts.py +2 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +2 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +2 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +16 -8
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/playht/tts.py +31 -4
- pipecat/services/rime/tts.py +3 -4
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +465 -0
- pipecat/services/sarvam/tts.py +2 -6
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +1 -7
- pipecat/services/stt_service.py +34 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +9 -9
- pipecat/services/vision_service.py +7 -6
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +17 -42
- pipecat/transports/base_output.py +42 -26
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +98 -19
- pipecat/transports/smallwebrtc/request_handler.py +204 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/string.py +12 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
|
@@ -13,6 +13,7 @@ and frame observation for the RTVI protocol.
|
|
|
13
13
|
|
|
14
14
|
import asyncio
|
|
15
15
|
import base64
|
|
16
|
+
import time
|
|
16
17
|
from dataclasses import dataclass
|
|
17
18
|
from typing import (
|
|
18
19
|
Any,
|
|
@@ -29,8 +30,8 @@ from typing import (
|
|
|
29
30
|
from loguru import logger
|
|
30
31
|
from pydantic import BaseModel, Field, PrivateAttr, ValidationError
|
|
31
32
|
|
|
33
|
+
from pipecat.audio.utils import calculate_audio_volume
|
|
32
34
|
from pipecat.frames.frames import (
|
|
33
|
-
BotInterruptionFrame,
|
|
34
35
|
BotStartedSpeakingFrame,
|
|
35
36
|
BotStoppedSpeakingFrame,
|
|
36
37
|
CancelFrame,
|
|
@@ -41,17 +42,20 @@ from pipecat.frames.frames import (
|
|
|
41
42
|
Frame,
|
|
42
43
|
FunctionCallResultFrame,
|
|
43
44
|
InputAudioRawFrame,
|
|
45
|
+
InputTransportMessageFrame,
|
|
44
46
|
InterimTranscriptionFrame,
|
|
47
|
+
LLMConfigureOutputFrame,
|
|
45
48
|
LLMContextFrame,
|
|
46
49
|
LLMFullResponseEndFrame,
|
|
47
50
|
LLMFullResponseStartFrame,
|
|
48
51
|
LLMMessagesAppendFrame,
|
|
49
52
|
LLMTextFrame,
|
|
50
53
|
MetricsFrame,
|
|
54
|
+
OutputTransportMessageUrgentFrame,
|
|
51
55
|
StartFrame,
|
|
52
56
|
SystemFrame,
|
|
53
57
|
TranscriptionFrame,
|
|
54
|
-
|
|
58
|
+
TTSAudioRawFrame,
|
|
55
59
|
TTSStartedFrame,
|
|
56
60
|
TTSStoppedFrame,
|
|
57
61
|
TTSTextFrame,
|
|
@@ -587,10 +591,35 @@ class RTVILLMFunctionCallMessage(BaseModel):
|
|
|
587
591
|
data: RTVILLMFunctionCallMessageData
|
|
588
592
|
|
|
589
593
|
|
|
594
|
+
class RTVISendTextOptions(BaseModel):
|
|
595
|
+
"""Options for sending text input to the LLM.
|
|
596
|
+
|
|
597
|
+
Contains options for how the pipeline should process the text input.
|
|
598
|
+
"""
|
|
599
|
+
|
|
600
|
+
run_immediately: bool = True
|
|
601
|
+
audio_response: bool = True
|
|
602
|
+
|
|
603
|
+
|
|
604
|
+
class RTVISendTextData(BaseModel):
|
|
605
|
+
"""Data format for sending text input to the LLM.
|
|
606
|
+
|
|
607
|
+
Contains the text content to send and any options for how the pipeline should process it.
|
|
608
|
+
|
|
609
|
+
"""
|
|
610
|
+
|
|
611
|
+
content: str
|
|
612
|
+
options: Optional[RTVISendTextOptions] = None
|
|
613
|
+
|
|
614
|
+
|
|
590
615
|
class RTVIAppendToContextData(BaseModel):
|
|
591
616
|
"""Data format for appending messages to the context.
|
|
592
617
|
|
|
593
618
|
Contains the role, content, and whether to run the message immediately.
|
|
619
|
+
|
|
620
|
+
.. deprecated:: 0.0.85
|
|
621
|
+
The RTVI message, append-to-context, has been deprecated. Use send-text
|
|
622
|
+
or custom client and server messages instead.
|
|
594
623
|
"""
|
|
595
624
|
|
|
596
625
|
role: Literal["user", "assistant"] | str
|
|
@@ -814,6 +843,36 @@ class RTVIServerMessage(BaseModel):
|
|
|
814
843
|
data: Any
|
|
815
844
|
|
|
816
845
|
|
|
846
|
+
class RTVIAudioLevelMessageData(BaseModel):
|
|
847
|
+
"""Data format for sending audio levels."""
|
|
848
|
+
|
|
849
|
+
value: float
|
|
850
|
+
|
|
851
|
+
|
|
852
|
+
class RTVIUserAudioLevelMessage(BaseModel):
|
|
853
|
+
"""Message indicating user audio level."""
|
|
854
|
+
|
|
855
|
+
label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
|
|
856
|
+
type: Literal["user-audio-level"] = "user-audio-level"
|
|
857
|
+
data: RTVIAudioLevelMessageData
|
|
858
|
+
|
|
859
|
+
|
|
860
|
+
class RTVIBotAudioLevelMessage(BaseModel):
|
|
861
|
+
"""Message indicating bot audio level."""
|
|
862
|
+
|
|
863
|
+
label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
|
|
864
|
+
type: Literal["bot-audio-level"] = "bot-audio-level"
|
|
865
|
+
data: RTVIAudioLevelMessageData
|
|
866
|
+
|
|
867
|
+
|
|
868
|
+
class RTVISystemLogMessage(BaseModel):
|
|
869
|
+
"""Message including a system log."""
|
|
870
|
+
|
|
871
|
+
label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
|
|
872
|
+
type: Literal["system-log"] = "system-log"
|
|
873
|
+
data: RTVITextMessageData
|
|
874
|
+
|
|
875
|
+
|
|
817
876
|
@dataclass
|
|
818
877
|
class RTVIServerMessageFrame(SystemFrame):
|
|
819
878
|
"""A frame for sending server messages to the client.
|
|
@@ -833,25 +892,36 @@ class RTVIServerMessageFrame(SystemFrame):
|
|
|
833
892
|
class RTVIObserverParams:
|
|
834
893
|
"""Parameters for configuring RTVI Observer behavior.
|
|
835
894
|
|
|
895
|
+
.. deprecated:: 0.0.87
|
|
896
|
+
Parameter `errors_enabled` is deprecated. Error messages are always enabled.
|
|
897
|
+
|
|
836
898
|
Parameters:
|
|
837
899
|
bot_llm_enabled: Indicates if the bot's LLM messages should be sent.
|
|
838
900
|
bot_tts_enabled: Indicates if the bot's TTS messages should be sent.
|
|
839
901
|
bot_speaking_enabled: Indicates if the bot's started/stopped speaking messages should be sent.
|
|
902
|
+
bot_audio_level_enabled: Indicates if bot's audio level messages should be sent.
|
|
840
903
|
user_llm_enabled: Indicates if the user's LLM input messages should be sent.
|
|
841
904
|
user_speaking_enabled: Indicates if the user's started/stopped speaking messages should be sent.
|
|
842
905
|
user_transcription_enabled: Indicates if user's transcription messages should be sent.
|
|
906
|
+
user_audio_level_enabled: Indicates if user's audio level messages should be sent.
|
|
843
907
|
metrics_enabled: Indicates if metrics messages should be sent.
|
|
844
|
-
|
|
908
|
+
system_logs_enabled: Indicates if system logs should be sent.
|
|
909
|
+
errors_enabled: [Deprecated] Indicates if errors messages should be sent.
|
|
910
|
+
audio_level_period_secs: How often audio levels should be sent if enabled.
|
|
845
911
|
"""
|
|
846
912
|
|
|
847
913
|
bot_llm_enabled: bool = True
|
|
848
914
|
bot_tts_enabled: bool = True
|
|
849
915
|
bot_speaking_enabled: bool = True
|
|
916
|
+
bot_audio_level_enabled: bool = False
|
|
850
917
|
user_llm_enabled: bool = True
|
|
851
918
|
user_speaking_enabled: bool = True
|
|
852
919
|
user_transcription_enabled: bool = True
|
|
920
|
+
user_audio_level_enabled: bool = False
|
|
853
921
|
metrics_enabled: bool = True
|
|
854
|
-
|
|
922
|
+
system_logs_enabled: bool = False
|
|
923
|
+
errors_enabled: Optional[bool] = None
|
|
924
|
+
audio_level_period_secs: float = 0.15
|
|
855
925
|
|
|
856
926
|
|
|
857
927
|
class RTVIObserver(BaseObserver):
|
|
@@ -867,7 +937,11 @@ class RTVIObserver(BaseObserver):
|
|
|
867
937
|
"""
|
|
868
938
|
|
|
869
939
|
def __init__(
|
|
870
|
-
self,
|
|
940
|
+
self,
|
|
941
|
+
rtvi: Optional["RTVIProcessor"] = None,
|
|
942
|
+
*,
|
|
943
|
+
params: Optional[RTVIObserverParams] = None,
|
|
944
|
+
**kwargs,
|
|
871
945
|
):
|
|
872
946
|
"""Initialize the RTVI observer.
|
|
873
947
|
|
|
@@ -879,9 +953,50 @@ class RTVIObserver(BaseObserver):
|
|
|
879
953
|
super().__init__(**kwargs)
|
|
880
954
|
self._rtvi = rtvi
|
|
881
955
|
self._params = params or RTVIObserverParams()
|
|
882
|
-
|
|
956
|
+
|
|
883
957
|
self._frames_seen = set()
|
|
884
|
-
|
|
958
|
+
|
|
959
|
+
self._bot_transcription = ""
|
|
960
|
+
self._last_user_audio_level = 0
|
|
961
|
+
self._last_bot_audio_level = 0
|
|
962
|
+
|
|
963
|
+
if self._params.system_logs_enabled:
|
|
964
|
+
self._system_logger_id = logger.add(self._logger_sink)
|
|
965
|
+
|
|
966
|
+
if self._params.errors_enabled is not None:
|
|
967
|
+
import warnings
|
|
968
|
+
|
|
969
|
+
with warnings.catch_warnings():
|
|
970
|
+
warnings.simplefilter("always")
|
|
971
|
+
warnings.warn(
|
|
972
|
+
"Parameter `errors_enabled` is deprecated. Error messages are always enabled.",
|
|
973
|
+
DeprecationWarning,
|
|
974
|
+
)
|
|
975
|
+
|
|
976
|
+
async def _logger_sink(self, message):
|
|
977
|
+
"""Logger sink so we cna send system logs to RTVI clients."""
|
|
978
|
+
message = RTVISystemLogMessage(data=RTVITextMessageData(text=message))
|
|
979
|
+
await self.send_rtvi_message(message)
|
|
980
|
+
|
|
981
|
+
async def cleanup(self):
|
|
982
|
+
"""Cleanup RTVI observer resources."""
|
|
983
|
+
await super().cleanup()
|
|
984
|
+
if self._params.system_logs_enabled:
|
|
985
|
+
logger.remove(self._system_logger_id)
|
|
986
|
+
|
|
987
|
+
async def send_rtvi_message(self, model: BaseModel, exclude_none: bool = True):
|
|
988
|
+
"""Send an RTVI message.
|
|
989
|
+
|
|
990
|
+
By default, we push a transport frame. But this function can be
|
|
991
|
+
overriden by subclass to send RTVI messages in different ways.
|
|
992
|
+
|
|
993
|
+
Args:
|
|
994
|
+
model: The message to send.
|
|
995
|
+
exclude_none: Whether to exclude None values from the model dump.
|
|
996
|
+
|
|
997
|
+
"""
|
|
998
|
+
if self._rtvi:
|
|
999
|
+
await self._rtvi.push_transport_message(model, exclude_none)
|
|
885
1000
|
|
|
886
1001
|
async def on_push_frame(self, data: FramePushed):
|
|
887
1002
|
"""Process a frame being pushed through the pipeline.
|
|
@@ -923,52 +1038,58 @@ class RTVIObserver(BaseObserver):
|
|
|
923
1038
|
):
|
|
924
1039
|
await self._handle_context(frame)
|
|
925
1040
|
elif isinstance(frame, LLMFullResponseStartFrame) and self._params.bot_llm_enabled:
|
|
926
|
-
await self.
|
|
1041
|
+
await self.send_rtvi_message(RTVIBotLLMStartedMessage())
|
|
927
1042
|
elif isinstance(frame, LLMFullResponseEndFrame) and self._params.bot_llm_enabled:
|
|
928
|
-
await self.
|
|
1043
|
+
await self.send_rtvi_message(RTVIBotLLMStoppedMessage())
|
|
929
1044
|
elif isinstance(frame, LLMTextFrame) and self._params.bot_llm_enabled:
|
|
930
1045
|
await self._handle_llm_text_frame(frame)
|
|
931
1046
|
elif isinstance(frame, TTSStartedFrame) and self._params.bot_tts_enabled:
|
|
932
|
-
await self.
|
|
1047
|
+
await self.send_rtvi_message(RTVIBotTTSStartedMessage())
|
|
933
1048
|
elif isinstance(frame, TTSStoppedFrame) and self._params.bot_tts_enabled:
|
|
934
|
-
await self.
|
|
1049
|
+
await self.send_rtvi_message(RTVIBotTTSStoppedMessage())
|
|
935
1050
|
elif isinstance(frame, TTSTextFrame) and self._params.bot_tts_enabled:
|
|
936
1051
|
if isinstance(src, BaseOutputTransport):
|
|
937
1052
|
message = RTVIBotTTSTextMessage(data=RTVITextMessageData(text=frame.text))
|
|
938
|
-
await self.
|
|
1053
|
+
await self.send_rtvi_message(message)
|
|
939
1054
|
else:
|
|
940
1055
|
mark_as_seen = False
|
|
941
1056
|
elif isinstance(frame, MetricsFrame) and self._params.metrics_enabled:
|
|
942
1057
|
await self._handle_metrics(frame)
|
|
943
1058
|
elif isinstance(frame, RTVIServerMessageFrame):
|
|
944
1059
|
message = RTVIServerMessage(data=frame.data)
|
|
945
|
-
await self.
|
|
1060
|
+
await self.send_rtvi_message(message)
|
|
946
1061
|
elif isinstance(frame, RTVIServerResponseFrame):
|
|
947
1062
|
if frame.error is not None:
|
|
948
1063
|
await self._send_error_response(frame)
|
|
949
1064
|
else:
|
|
950
1065
|
await self._send_server_response(frame)
|
|
1066
|
+
elif isinstance(frame, InputAudioRawFrame) and self._params.user_audio_level_enabled:
|
|
1067
|
+
curr_time = time.time()
|
|
1068
|
+
diff_time = curr_time - self._last_user_audio_level
|
|
1069
|
+
if diff_time > self._params.audio_level_period_secs:
|
|
1070
|
+
level = calculate_audio_volume(frame.audio, frame.sample_rate)
|
|
1071
|
+
message = RTVIUserAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
|
|
1072
|
+
await self.send_rtvi_message(message)
|
|
1073
|
+
self._last_user_audio_level = curr_time
|
|
1074
|
+
elif isinstance(frame, TTSAudioRawFrame) and self._params.bot_audio_level_enabled:
|
|
1075
|
+
curr_time = time.time()
|
|
1076
|
+
diff_time = curr_time - self._last_bot_audio_level
|
|
1077
|
+
if diff_time > self._params.audio_level_period_secs:
|
|
1078
|
+
level = calculate_audio_volume(frame.audio, frame.sample_rate)
|
|
1079
|
+
message = RTVIBotAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
|
|
1080
|
+
await self.send_rtvi_message(message)
|
|
1081
|
+
self._last_bot_audio_level = curr_time
|
|
951
1082
|
|
|
952
1083
|
if mark_as_seen:
|
|
953
1084
|
self._frames_seen.add(frame.id)
|
|
954
1085
|
|
|
955
|
-
async def push_transport_message_urgent(self, model: BaseModel, exclude_none: bool = True):
|
|
956
|
-
"""Push an urgent transport message to the RTVI processor.
|
|
957
|
-
|
|
958
|
-
Args:
|
|
959
|
-
model: The message model to send.
|
|
960
|
-
exclude_none: Whether to exclude None values from the model dump.
|
|
961
|
-
"""
|
|
962
|
-
frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
|
|
963
|
-
await self._rtvi.push_frame(frame)
|
|
964
|
-
|
|
965
1086
|
async def _push_bot_transcription(self):
|
|
966
1087
|
"""Push accumulated bot transcription as a message."""
|
|
967
1088
|
if len(self._bot_transcription) > 0:
|
|
968
1089
|
message = RTVIBotTranscriptionMessage(
|
|
969
1090
|
data=RTVITextMessageData(text=self._bot_transcription)
|
|
970
1091
|
)
|
|
971
|
-
await self.
|
|
1092
|
+
await self.send_rtvi_message(message)
|
|
972
1093
|
self._bot_transcription = ""
|
|
973
1094
|
|
|
974
1095
|
async def _handle_interruptions(self, frame: Frame):
|
|
@@ -980,7 +1101,7 @@ class RTVIObserver(BaseObserver):
|
|
|
980
1101
|
message = RTVIUserStoppedSpeakingMessage()
|
|
981
1102
|
|
|
982
1103
|
if message:
|
|
983
|
-
await self.
|
|
1104
|
+
await self.send_rtvi_message(message)
|
|
984
1105
|
|
|
985
1106
|
async def _handle_bot_speaking(self, frame: Frame):
|
|
986
1107
|
"""Handle bot speaking event frames."""
|
|
@@ -991,12 +1112,12 @@ class RTVIObserver(BaseObserver):
|
|
|
991
1112
|
message = RTVIBotStoppedSpeakingMessage()
|
|
992
1113
|
|
|
993
1114
|
if message:
|
|
994
|
-
await self.
|
|
1115
|
+
await self.send_rtvi_message(message)
|
|
995
1116
|
|
|
996
1117
|
async def _handle_llm_text_frame(self, frame: LLMTextFrame):
|
|
997
1118
|
"""Handle LLM text output frames."""
|
|
998
1119
|
message = RTVIBotLLMTextMessage(data=RTVITextMessageData(text=frame.text))
|
|
999
|
-
await self.
|
|
1120
|
+
await self.send_rtvi_message(message)
|
|
1000
1121
|
|
|
1001
1122
|
self._bot_transcription += frame.text
|
|
1002
1123
|
if match_endofsentence(self._bot_transcription):
|
|
@@ -1019,7 +1140,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1019
1140
|
)
|
|
1020
1141
|
|
|
1021
1142
|
if message:
|
|
1022
|
-
await self.
|
|
1143
|
+
await self.send_rtvi_message(message)
|
|
1023
1144
|
|
|
1024
1145
|
async def _handle_context(self, frame: OpenAILLMContextFrame | LLMContextFrame):
|
|
1025
1146
|
"""Process LLM context frames to extract user messages for the RTVI client."""
|
|
@@ -1039,7 +1160,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1039
1160
|
text = "".join(part.text for part in message.parts if hasattr(part, "text"))
|
|
1040
1161
|
if text:
|
|
1041
1162
|
rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
|
|
1042
|
-
await self.
|
|
1163
|
+
await self.send_rtvi_message(rtvi_message)
|
|
1043
1164
|
|
|
1044
1165
|
# Handle OpenAI format (original implementation)
|
|
1045
1166
|
elif isinstance(message, dict):
|
|
@@ -1050,7 +1171,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1050
1171
|
else:
|
|
1051
1172
|
text = content
|
|
1052
1173
|
rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
|
|
1053
|
-
await self.
|
|
1174
|
+
await self.send_rtvi_message(rtvi_message)
|
|
1054
1175
|
|
|
1055
1176
|
except Exception as e:
|
|
1056
1177
|
logger.warning(f"Caught an error while trying to handle context: {e}")
|
|
@@ -1077,7 +1198,7 @@ class RTVIObserver(BaseObserver):
|
|
|
1077
1198
|
metrics["characters"].append(d.model_dump(exclude_none=True))
|
|
1078
1199
|
|
|
1079
1200
|
message = RTVIMetricsMessage(data=metrics)
|
|
1080
|
-
await self.
|
|
1201
|
+
await self.send_rtvi_message(message)
|
|
1081
1202
|
|
|
1082
1203
|
async def _send_server_response(self, frame: RTVIServerResponseFrame):
|
|
1083
1204
|
"""Send a response to the client for a specific request."""
|
|
@@ -1085,15 +1206,14 @@ class RTVIObserver(BaseObserver):
|
|
|
1085
1206
|
id=str(frame.client_msg.msg_id),
|
|
1086
1207
|
data=RTVIRawServerResponseData(t=frame.client_msg.type, d=frame.data),
|
|
1087
1208
|
)
|
|
1088
|
-
await self.
|
|
1209
|
+
await self.send_rtvi_message(message)
|
|
1089
1210
|
|
|
1090
1211
|
async def _send_error_response(self, frame: RTVIServerResponseFrame):
|
|
1091
1212
|
"""Send a response to the client for a specific request."""
|
|
1092
|
-
|
|
1093
|
-
|
|
1094
|
-
|
|
1095
|
-
|
|
1096
|
-
await self.push_transport_message_urgent(message)
|
|
1213
|
+
message = RTVIErrorResponse(
|
|
1214
|
+
id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
|
|
1215
|
+
)
|
|
1216
|
+
await self.send_rtvi_message(message)
|
|
1097
1217
|
|
|
1098
1218
|
|
|
1099
1219
|
class RTVIProcessor(FrameProcessor):
|
|
@@ -1127,7 +1247,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1127
1247
|
# Default to 0.3.0 which is the last version before actually having a
|
|
1128
1248
|
# "client-version".
|
|
1129
1249
|
self._client_version = [0, 3, 0]
|
|
1130
|
-
self.
|
|
1250
|
+
self._skip_tts: bool = False # Keep in sync with llm_service.py
|
|
1131
1251
|
|
|
1132
1252
|
self._registered_actions: Dict[str, RTVIAction] = {}
|
|
1133
1253
|
self._registered_services: Dict[str, RTVIService] = {}
|
|
@@ -1196,17 +1316,9 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1196
1316
|
await self._update_config(self._config, False)
|
|
1197
1317
|
await self._send_bot_ready()
|
|
1198
1318
|
|
|
1199
|
-
def set_errors_enabled(self, enabled: bool):
|
|
1200
|
-
"""Enable or disable error message sending.
|
|
1201
|
-
|
|
1202
|
-
Args:
|
|
1203
|
-
enabled: Whether to send error messages.
|
|
1204
|
-
"""
|
|
1205
|
-
self._errors_enabled = enabled
|
|
1206
|
-
|
|
1207
1319
|
async def interrupt_bot(self):
|
|
1208
1320
|
"""Send a bot interruption frame upstream."""
|
|
1209
|
-
await self.
|
|
1321
|
+
await self.push_interruption_task_frame_and_wait()
|
|
1210
1322
|
|
|
1211
1323
|
async def send_server_message(self, data: Any):
|
|
1212
1324
|
"""Send a server message to the client."""
|
|
@@ -1232,6 +1344,13 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1232
1344
|
"""
|
|
1233
1345
|
await self._send_error_frame(ErrorFrame(error=error))
|
|
1234
1346
|
|
|
1347
|
+
async def push_transport_message(self, model: BaseModel, exclude_none: bool = True):
|
|
1348
|
+
"""Push a transport message frame."""
|
|
1349
|
+
frame = OutputTransportMessageUrgentFrame(
|
|
1350
|
+
message=model.model_dump(exclude_none=exclude_none)
|
|
1351
|
+
)
|
|
1352
|
+
await self.push_frame(frame)
|
|
1353
|
+
|
|
1235
1354
|
async def handle_message(self, message: RTVIMessage):
|
|
1236
1355
|
"""Handle an incoming RTVI message.
|
|
1237
1356
|
|
|
@@ -1252,7 +1371,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1252
1371
|
args=params.arguments,
|
|
1253
1372
|
)
|
|
1254
1373
|
message = RTVILLMFunctionCallMessage(data=fn)
|
|
1255
|
-
await self.
|
|
1374
|
+
await self.push_transport_message(message, exclude_none=False)
|
|
1256
1375
|
|
|
1257
1376
|
async def handle_function_call_start(
|
|
1258
1377
|
self, function_name: str, llm: FrameProcessor, context: OpenAILLMContext
|
|
@@ -1279,7 +1398,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1279
1398
|
|
|
1280
1399
|
fn = RTVILLMFunctionCallStartMessageData(function_name=function_name)
|
|
1281
1400
|
message = RTVILLMFunctionCallStartMessage(data=fn)
|
|
1282
|
-
await self.
|
|
1401
|
+
await self.push_transport_message(message, exclude_none=False)
|
|
1283
1402
|
|
|
1284
1403
|
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
1285
1404
|
"""Process incoming frames through the RTVI processor.
|
|
@@ -1302,7 +1421,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1302
1421
|
elif isinstance(frame, ErrorFrame):
|
|
1303
1422
|
await self._send_error_frame(frame)
|
|
1304
1423
|
await self.push_frame(frame, direction)
|
|
1305
|
-
elif isinstance(frame,
|
|
1424
|
+
elif isinstance(frame, InputTransportMessageFrame):
|
|
1306
1425
|
await self._handle_transport_message(frame)
|
|
1307
1426
|
# All other system frames
|
|
1308
1427
|
elif isinstance(frame, SystemFrame):
|
|
@@ -1316,6 +1435,9 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1316
1435
|
# Data frames
|
|
1317
1436
|
elif isinstance(frame, RTVIActionFrame):
|
|
1318
1437
|
await self._action_queue.put(frame)
|
|
1438
|
+
elif isinstance(frame, LLMConfigureOutputFrame):
|
|
1439
|
+
self._skip_tts = frame.skip_tts
|
|
1440
|
+
await self.push_frame(frame, direction)
|
|
1319
1441
|
# Other frames
|
|
1320
1442
|
else:
|
|
1321
1443
|
await self.push_frame(frame, direction)
|
|
@@ -1348,11 +1470,6 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1348
1470
|
await self.cancel_task(self._message_task)
|
|
1349
1471
|
self._message_task = None
|
|
1350
1472
|
|
|
1351
|
-
async def _push_transport_message(self, model: BaseModel, exclude_none: bool = True):
|
|
1352
|
-
"""Push a transport message frame."""
|
|
1353
|
-
frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
|
|
1354
|
-
await self.push_frame(frame)
|
|
1355
|
-
|
|
1356
1473
|
async def _action_task_handler(self):
|
|
1357
1474
|
"""Handle incoming action frames."""
|
|
1358
1475
|
while True:
|
|
@@ -1367,7 +1484,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1367
1484
|
await self._handle_message(message)
|
|
1368
1485
|
self._message_queue.task_done()
|
|
1369
1486
|
|
|
1370
|
-
async def _handle_transport_message(self, frame:
|
|
1487
|
+
async def _handle_transport_message(self, frame: InputTransportMessageFrame):
|
|
1371
1488
|
"""Handle an incoming transport message frame."""
|
|
1372
1489
|
try:
|
|
1373
1490
|
transport_message = frame.message
|
|
@@ -1415,7 +1532,13 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1415
1532
|
case "llm-function-call-result":
|
|
1416
1533
|
data = RTVILLMFunctionCallResultData.model_validate(message.data)
|
|
1417
1534
|
await self._handle_function_call_result(data)
|
|
1535
|
+
case "send-text":
|
|
1536
|
+
data = RTVISendTextData.model_validate(message.data)
|
|
1537
|
+
await self._handle_send_text(data)
|
|
1418
1538
|
case "append-to-context":
|
|
1539
|
+
logger.warning(
|
|
1540
|
+
f"The append-to-context message is deprecated, use send-text instead."
|
|
1541
|
+
)
|
|
1419
1542
|
data = RTVIAppendToContextData.model_validate(message.data)
|
|
1420
1543
|
await self._handle_update_context(data)
|
|
1421
1544
|
case "raw-audio" | "raw-audio-batch":
|
|
@@ -1483,7 +1606,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1483
1606
|
|
|
1484
1607
|
services = list(self._registered_services.values())
|
|
1485
1608
|
message = RTVIDescribeConfig(id=request_id, data=RTVIDescribeConfigData(config=services))
|
|
1486
|
-
await self.
|
|
1609
|
+
await self.push_transport_message(message)
|
|
1487
1610
|
|
|
1488
1611
|
async def _handle_describe_actions(self, request_id: str):
|
|
1489
1612
|
"""Handle a describe-actions request."""
|
|
@@ -1498,7 +1621,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1498
1621
|
|
|
1499
1622
|
actions = list(self._registered_actions.values())
|
|
1500
1623
|
message = RTVIDescribeActions(id=request_id, data=RTVIDescribeActionsData(actions=actions))
|
|
1501
|
-
await self.
|
|
1624
|
+
await self.push_transport_message(message)
|
|
1502
1625
|
|
|
1503
1626
|
async def _handle_get_config(self, request_id: str):
|
|
1504
1627
|
"""Handle a get-config request."""
|
|
@@ -1512,7 +1635,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1512
1635
|
)
|
|
1513
1636
|
|
|
1514
1637
|
message = RTVIConfigResponse(id=request_id, data=self._config)
|
|
1515
|
-
await self.
|
|
1638
|
+
await self.push_transport_message(message)
|
|
1516
1639
|
|
|
1517
1640
|
def _update_config_option(self, service: str, config: RTVIServiceOptionConfig):
|
|
1518
1641
|
"""Update a specific configuration option."""
|
|
@@ -1564,6 +1687,26 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1564
1687
|
await self._update_config(RTVIConfig(config=data.config), data.interrupt)
|
|
1565
1688
|
await self._handle_get_config(request_id)
|
|
1566
1689
|
|
|
1690
|
+
async def _handle_send_text(self, data: RTVISendTextData):
|
|
1691
|
+
"""Handle a send-text message from the client."""
|
|
1692
|
+
opts = data.options if data.options is not None else RTVISendTextOptions()
|
|
1693
|
+
if opts.run_immediately:
|
|
1694
|
+
await self.interrupt_bot()
|
|
1695
|
+
cur_skip_tts = self._skip_tts
|
|
1696
|
+
should_skip_tts = not opts.audio_response
|
|
1697
|
+
toggle_skip_tts = cur_skip_tts != should_skip_tts
|
|
1698
|
+
if toggle_skip_tts:
|
|
1699
|
+
output_frame = LLMConfigureOutputFrame(skip_tts=should_skip_tts)
|
|
1700
|
+
await self.push_frame(output_frame)
|
|
1701
|
+
text_frame = LLMMessagesAppendFrame(
|
|
1702
|
+
messages=[{"role": "user", "content": data.content}],
|
|
1703
|
+
run_llm=opts.run_immediately,
|
|
1704
|
+
)
|
|
1705
|
+
await self.push_frame(text_frame)
|
|
1706
|
+
if toggle_skip_tts:
|
|
1707
|
+
output_frame = LLMConfigureOutputFrame(skip_tts=cur_skip_tts)
|
|
1708
|
+
await self.push_frame(output_frame)
|
|
1709
|
+
|
|
1567
1710
|
async def _handle_update_context(self, data: RTVIAppendToContextData):
|
|
1568
1711
|
if data.run_immediately:
|
|
1569
1712
|
await self.interrupt_bot()
|
|
@@ -1617,7 +1760,7 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1617
1760
|
# action responses (such as webhooks) don't set a request_id
|
|
1618
1761
|
if request_id:
|
|
1619
1762
|
message = RTVIActionResponse(id=request_id, data=RTVIActionResponseData(result=result))
|
|
1620
|
-
await self.
|
|
1763
|
+
await self.push_transport_message(message)
|
|
1621
1764
|
|
|
1622
1765
|
async def _send_bot_ready(self):
|
|
1623
1766
|
"""Send the bot-ready message to the client."""
|
|
@@ -1628,23 +1771,21 @@ class RTVIProcessor(FrameProcessor):
|
|
|
1628
1771
|
id=self._client_ready_id,
|
|
1629
1772
|
data=RTVIBotReadyData(version=RTVI_PROTOCOL_VERSION, config=config),
|
|
1630
1773
|
)
|
|
1631
|
-
await self.
|
|
1774
|
+
await self.push_transport_message(message)
|
|
1632
1775
|
|
|
1633
1776
|
async def _send_server_message(self, message: RTVIServerMessage | RTVIServerResponse):
|
|
1634
1777
|
"""Send a message or response to the client."""
|
|
1635
|
-
await self.
|
|
1778
|
+
await self.push_transport_message(message)
|
|
1636
1779
|
|
|
1637
1780
|
async def _send_error_frame(self, frame: ErrorFrame):
|
|
1638
1781
|
"""Send an error frame as an RTVI error message."""
|
|
1639
|
-
|
|
1640
|
-
|
|
1641
|
-
await self._push_transport_message(message)
|
|
1782
|
+
message = RTVIError(data=RTVIErrorData(error=frame.error, fatal=frame.fatal))
|
|
1783
|
+
await self.push_transport_message(message)
|
|
1642
1784
|
|
|
1643
1785
|
async def _send_error_response(self, id: str, error: str):
|
|
1644
1786
|
"""Send an error response message."""
|
|
1645
|
-
|
|
1646
|
-
|
|
1647
|
-
await self._push_transport_message(message)
|
|
1787
|
+
message = RTVIErrorResponse(id=id, data=RTVIErrorResponseData(error=error))
|
|
1788
|
+
await self.push_transport_message(message)
|
|
1648
1789
|
|
|
1649
1790
|
def _action_id(self, service: str, action: str) -> str:
|
|
1650
1791
|
"""Generate an action ID from service and action names."""
|