dv-pipecat-ai 0.0.85.dev7__py3-none-any.whl → 0.0.85.dev699__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (158) hide show
  1. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/METADATA +78 -117
  2. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/RECORD +158 -122
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +5 -0
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  11. pipecat/audio/filters/noisereduce_filter.py +15 -0
  12. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  13. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  14. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  15. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  16. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  17. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  18. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  19. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  20. pipecat/audio/vad/data/README.md +10 -0
  21. pipecat/audio/vad/vad_analyzer.py +13 -1
  22. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  23. pipecat/frames/frames.py +120 -87
  24. pipecat/observers/loggers/debug_log_observer.py +3 -3
  25. pipecat/observers/loggers/llm_log_observer.py +7 -3
  26. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  27. pipecat/pipeline/runner.py +12 -4
  28. pipecat/pipeline/service_switcher.py +64 -36
  29. pipecat/pipeline/task.py +85 -24
  30. pipecat/processors/aggregators/dtmf_aggregator.py +28 -22
  31. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  32. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  33. pipecat/processors/aggregators/llm_response.py +6 -7
  34. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  35. pipecat/processors/aggregators/user_response.py +6 -6
  36. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  37. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  38. pipecat/processors/filters/stt_mute_filter.py +2 -0
  39. pipecat/processors/frame_processor.py +103 -17
  40. pipecat/processors/frameworks/langchain.py +8 -2
  41. pipecat/processors/frameworks/rtvi.py +209 -68
  42. pipecat/processors/frameworks/strands_agents.py +170 -0
  43. pipecat/processors/logger.py +2 -2
  44. pipecat/processors/transcript_processor.py +4 -4
  45. pipecat/processors/user_idle_processor.py +3 -6
  46. pipecat/runner/run.py +270 -50
  47. pipecat/runner/types.py +2 -0
  48. pipecat/runner/utils.py +51 -10
  49. pipecat/serializers/exotel.py +5 -5
  50. pipecat/serializers/livekit.py +20 -0
  51. pipecat/serializers/plivo.py +6 -9
  52. pipecat/serializers/protobuf.py +6 -5
  53. pipecat/serializers/telnyx.py +2 -2
  54. pipecat/serializers/twilio.py +43 -23
  55. pipecat/services/ai_service.py +2 -6
  56. pipecat/services/anthropic/llm.py +2 -25
  57. pipecat/services/asyncai/tts.py +2 -3
  58. pipecat/services/aws/__init__.py +1 -0
  59. pipecat/services/aws/llm.py +122 -97
  60. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  61. pipecat/services/aws/nova_sonic/context.py +367 -0
  62. pipecat/services/aws/nova_sonic/frames.py +25 -0
  63. pipecat/services/aws/nova_sonic/llm.py +1155 -0
  64. pipecat/services/aws/stt.py +1 -3
  65. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  66. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  67. pipecat/services/aws_nova_sonic/context.py +13 -355
  68. pipecat/services/aws_nova_sonic/frames.py +13 -17
  69. pipecat/services/azure/realtime/__init__.py +0 -0
  70. pipecat/services/azure/realtime/llm.py +65 -0
  71. pipecat/services/azure/stt.py +15 -0
  72. pipecat/services/cartesia/tts.py +2 -2
  73. pipecat/services/deepgram/__init__.py +1 -0
  74. pipecat/services/deepgram/flux/__init__.py +0 -0
  75. pipecat/services/deepgram/flux/stt.py +636 -0
  76. pipecat/services/elevenlabs/__init__.py +2 -1
  77. pipecat/services/elevenlabs/stt.py +254 -276
  78. pipecat/services/elevenlabs/tts.py +5 -5
  79. pipecat/services/fish/tts.py +2 -2
  80. pipecat/services/gemini_multimodal_live/events.py +38 -524
  81. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  82. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  83. pipecat/services/gladia/stt.py +56 -72
  84. pipecat/services/google/__init__.py +1 -0
  85. pipecat/services/google/gemini_live/__init__.py +3 -0
  86. pipecat/services/google/gemini_live/file_api.py +189 -0
  87. pipecat/services/google/gemini_live/llm.py +1582 -0
  88. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  89. pipecat/services/google/llm.py +15 -11
  90. pipecat/services/google/llm_openai.py +3 -3
  91. pipecat/services/google/llm_vertex.py +86 -16
  92. pipecat/services/google/tts.py +7 -3
  93. pipecat/services/heygen/api.py +2 -0
  94. pipecat/services/heygen/client.py +8 -4
  95. pipecat/services/heygen/video.py +2 -0
  96. pipecat/services/hume/__init__.py +5 -0
  97. pipecat/services/hume/tts.py +220 -0
  98. pipecat/services/inworld/tts.py +6 -6
  99. pipecat/services/llm_service.py +15 -5
  100. pipecat/services/lmnt/tts.py +2 -2
  101. pipecat/services/mcp_service.py +4 -2
  102. pipecat/services/mem0/memory.py +6 -5
  103. pipecat/services/mistral/llm.py +29 -8
  104. pipecat/services/moondream/vision.py +42 -16
  105. pipecat/services/neuphonic/tts.py +2 -2
  106. pipecat/services/openai/__init__.py +1 -0
  107. pipecat/services/openai/base_llm.py +27 -20
  108. pipecat/services/openai/realtime/__init__.py +0 -0
  109. pipecat/services/openai/realtime/context.py +272 -0
  110. pipecat/services/openai/realtime/events.py +1106 -0
  111. pipecat/services/openai/realtime/frames.py +37 -0
  112. pipecat/services/openai/realtime/llm.py +829 -0
  113. pipecat/services/openai/tts.py +16 -8
  114. pipecat/services/openai_realtime/__init__.py +27 -0
  115. pipecat/services/openai_realtime/azure.py +21 -0
  116. pipecat/services/openai_realtime/context.py +21 -0
  117. pipecat/services/openai_realtime/events.py +21 -0
  118. pipecat/services/openai_realtime/frames.py +21 -0
  119. pipecat/services/openai_realtime_beta/azure.py +16 -0
  120. pipecat/services/openai_realtime_beta/openai.py +17 -5
  121. pipecat/services/playht/tts.py +31 -4
  122. pipecat/services/rime/tts.py +3 -4
  123. pipecat/services/salesforce/__init__.py +9 -0
  124. pipecat/services/salesforce/llm.py +465 -0
  125. pipecat/services/sarvam/tts.py +2 -6
  126. pipecat/services/simli/video.py +2 -2
  127. pipecat/services/speechmatics/stt.py +1 -7
  128. pipecat/services/stt_service.py +34 -0
  129. pipecat/services/tavus/video.py +2 -2
  130. pipecat/services/tts_service.py +9 -9
  131. pipecat/services/vision_service.py +7 -6
  132. pipecat/tests/utils.py +4 -4
  133. pipecat/transcriptions/language.py +41 -1
  134. pipecat/transports/base_input.py +17 -42
  135. pipecat/transports/base_output.py +42 -26
  136. pipecat/transports/daily/transport.py +199 -26
  137. pipecat/transports/heygen/__init__.py +0 -0
  138. pipecat/transports/heygen/transport.py +381 -0
  139. pipecat/transports/livekit/transport.py +228 -63
  140. pipecat/transports/local/audio.py +6 -1
  141. pipecat/transports/local/tk.py +11 -2
  142. pipecat/transports/network/fastapi_websocket.py +1 -1
  143. pipecat/transports/smallwebrtc/connection.py +98 -19
  144. pipecat/transports/smallwebrtc/request_handler.py +204 -0
  145. pipecat/transports/smallwebrtc/transport.py +65 -23
  146. pipecat/transports/tavus/transport.py +23 -12
  147. pipecat/transports/websocket/client.py +41 -5
  148. pipecat/transports/websocket/fastapi.py +21 -11
  149. pipecat/transports/websocket/server.py +14 -7
  150. pipecat/transports/whatsapp/api.py +8 -0
  151. pipecat/transports/whatsapp/client.py +47 -0
  152. pipecat/utils/base_object.py +54 -22
  153. pipecat/utils/string.py +12 -1
  154. pipecat/utils/tracing/service_decorators.py +21 -21
  155. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/WHEEL +0 -0
  156. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/licenses/LICENSE +0 -0
  157. {dv_pipecat_ai-0.0.85.dev7.dist-info → dv_pipecat_ai-0.0.85.dev699.dist-info}/top_level.txt +0 -0
  158. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -13,6 +13,7 @@ and frame observation for the RTVI protocol.
13
13
 
14
14
  import asyncio
15
15
  import base64
16
+ import time
16
17
  from dataclasses import dataclass
17
18
  from typing import (
18
19
  Any,
@@ -29,8 +30,8 @@ from typing import (
29
30
  from loguru import logger
30
31
  from pydantic import BaseModel, Field, PrivateAttr, ValidationError
31
32
 
33
+ from pipecat.audio.utils import calculate_audio_volume
32
34
  from pipecat.frames.frames import (
33
- BotInterruptionFrame,
34
35
  BotStartedSpeakingFrame,
35
36
  BotStoppedSpeakingFrame,
36
37
  CancelFrame,
@@ -41,17 +42,20 @@ from pipecat.frames.frames import (
41
42
  Frame,
42
43
  FunctionCallResultFrame,
43
44
  InputAudioRawFrame,
45
+ InputTransportMessageFrame,
44
46
  InterimTranscriptionFrame,
47
+ LLMConfigureOutputFrame,
45
48
  LLMContextFrame,
46
49
  LLMFullResponseEndFrame,
47
50
  LLMFullResponseStartFrame,
48
51
  LLMMessagesAppendFrame,
49
52
  LLMTextFrame,
50
53
  MetricsFrame,
54
+ OutputTransportMessageUrgentFrame,
51
55
  StartFrame,
52
56
  SystemFrame,
53
57
  TranscriptionFrame,
54
- TransportMessageUrgentFrame,
58
+ TTSAudioRawFrame,
55
59
  TTSStartedFrame,
56
60
  TTSStoppedFrame,
57
61
  TTSTextFrame,
@@ -587,10 +591,35 @@ class RTVILLMFunctionCallMessage(BaseModel):
587
591
  data: RTVILLMFunctionCallMessageData
588
592
 
589
593
 
594
+ class RTVISendTextOptions(BaseModel):
595
+ """Options for sending text input to the LLM.
596
+
597
+ Contains options for how the pipeline should process the text input.
598
+ """
599
+
600
+ run_immediately: bool = True
601
+ audio_response: bool = True
602
+
603
+
604
+ class RTVISendTextData(BaseModel):
605
+ """Data format for sending text input to the LLM.
606
+
607
+ Contains the text content to send and any options for how the pipeline should process it.
608
+
609
+ """
610
+
611
+ content: str
612
+ options: Optional[RTVISendTextOptions] = None
613
+
614
+
590
615
  class RTVIAppendToContextData(BaseModel):
591
616
  """Data format for appending messages to the context.
592
617
 
593
618
  Contains the role, content, and whether to run the message immediately.
619
+
620
+ .. deprecated:: 0.0.85
621
+ The RTVI message, append-to-context, has been deprecated. Use send-text
622
+ or custom client and server messages instead.
594
623
  """
595
624
 
596
625
  role: Literal["user", "assistant"] | str
@@ -814,6 +843,36 @@ class RTVIServerMessage(BaseModel):
814
843
  data: Any
815
844
 
816
845
 
846
+ class RTVIAudioLevelMessageData(BaseModel):
847
+ """Data format for sending audio levels."""
848
+
849
+ value: float
850
+
851
+
852
+ class RTVIUserAudioLevelMessage(BaseModel):
853
+ """Message indicating user audio level."""
854
+
855
+ label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
856
+ type: Literal["user-audio-level"] = "user-audio-level"
857
+ data: RTVIAudioLevelMessageData
858
+
859
+
860
+ class RTVIBotAudioLevelMessage(BaseModel):
861
+ """Message indicating bot audio level."""
862
+
863
+ label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
864
+ type: Literal["bot-audio-level"] = "bot-audio-level"
865
+ data: RTVIAudioLevelMessageData
866
+
867
+
868
+ class RTVISystemLogMessage(BaseModel):
869
+ """Message including a system log."""
870
+
871
+ label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
872
+ type: Literal["system-log"] = "system-log"
873
+ data: RTVITextMessageData
874
+
875
+
817
876
  @dataclass
818
877
  class RTVIServerMessageFrame(SystemFrame):
819
878
  """A frame for sending server messages to the client.
@@ -833,25 +892,36 @@ class RTVIServerMessageFrame(SystemFrame):
833
892
  class RTVIObserverParams:
834
893
  """Parameters for configuring RTVI Observer behavior.
835
894
 
895
+ .. deprecated:: 0.0.87
896
+ Parameter `errors_enabled` is deprecated. Error messages are always enabled.
897
+
836
898
  Parameters:
837
899
  bot_llm_enabled: Indicates if the bot's LLM messages should be sent.
838
900
  bot_tts_enabled: Indicates if the bot's TTS messages should be sent.
839
901
  bot_speaking_enabled: Indicates if the bot's started/stopped speaking messages should be sent.
902
+ bot_audio_level_enabled: Indicates if bot's audio level messages should be sent.
840
903
  user_llm_enabled: Indicates if the user's LLM input messages should be sent.
841
904
  user_speaking_enabled: Indicates if the user's started/stopped speaking messages should be sent.
842
905
  user_transcription_enabled: Indicates if user's transcription messages should be sent.
906
+ user_audio_level_enabled: Indicates if user's audio level messages should be sent.
843
907
  metrics_enabled: Indicates if metrics messages should be sent.
844
- errors_enabled: Indicates if errors messages should be sent.
908
+ system_logs_enabled: Indicates if system logs should be sent.
909
+ errors_enabled: [Deprecated] Indicates if errors messages should be sent.
910
+ audio_level_period_secs: How often audio levels should be sent if enabled.
845
911
  """
846
912
 
847
913
  bot_llm_enabled: bool = True
848
914
  bot_tts_enabled: bool = True
849
915
  bot_speaking_enabled: bool = True
916
+ bot_audio_level_enabled: bool = False
850
917
  user_llm_enabled: bool = True
851
918
  user_speaking_enabled: bool = True
852
919
  user_transcription_enabled: bool = True
920
+ user_audio_level_enabled: bool = False
853
921
  metrics_enabled: bool = True
854
- errors_enabled: bool = True
922
+ system_logs_enabled: bool = False
923
+ errors_enabled: Optional[bool] = None
924
+ audio_level_period_secs: float = 0.15
855
925
 
856
926
 
857
927
  class RTVIObserver(BaseObserver):
@@ -867,7 +937,11 @@ class RTVIObserver(BaseObserver):
867
937
  """
868
938
 
869
939
  def __init__(
870
- self, rtvi: "RTVIProcessor", *, params: Optional[RTVIObserverParams] = None, **kwargs
940
+ self,
941
+ rtvi: Optional["RTVIProcessor"] = None,
942
+ *,
943
+ params: Optional[RTVIObserverParams] = None,
944
+ **kwargs,
871
945
  ):
872
946
  """Initialize the RTVI observer.
873
947
 
@@ -879,9 +953,50 @@ class RTVIObserver(BaseObserver):
879
953
  super().__init__(**kwargs)
880
954
  self._rtvi = rtvi
881
955
  self._params = params or RTVIObserverParams()
882
- self._bot_transcription = ""
956
+
883
957
  self._frames_seen = set()
884
- rtvi.set_errors_enabled(self._params.errors_enabled)
958
+
959
+ self._bot_transcription = ""
960
+ self._last_user_audio_level = 0
961
+ self._last_bot_audio_level = 0
962
+
963
+ if self._params.system_logs_enabled:
964
+ self._system_logger_id = logger.add(self._logger_sink)
965
+
966
+ if self._params.errors_enabled is not None:
967
+ import warnings
968
+
969
+ with warnings.catch_warnings():
970
+ warnings.simplefilter("always")
971
+ warnings.warn(
972
+ "Parameter `errors_enabled` is deprecated. Error messages are always enabled.",
973
+ DeprecationWarning,
974
+ )
975
+
976
+ async def _logger_sink(self, message):
977
+ """Logger sink so we cna send system logs to RTVI clients."""
978
+ message = RTVISystemLogMessage(data=RTVITextMessageData(text=message))
979
+ await self.send_rtvi_message(message)
980
+
981
+ async def cleanup(self):
982
+ """Cleanup RTVI observer resources."""
983
+ await super().cleanup()
984
+ if self._params.system_logs_enabled:
985
+ logger.remove(self._system_logger_id)
986
+
987
+ async def send_rtvi_message(self, model: BaseModel, exclude_none: bool = True):
988
+ """Send an RTVI message.
989
+
990
+ By default, we push a transport frame. But this function can be
991
+ overriden by subclass to send RTVI messages in different ways.
992
+
993
+ Args:
994
+ model: The message to send.
995
+ exclude_none: Whether to exclude None values from the model dump.
996
+
997
+ """
998
+ if self._rtvi:
999
+ await self._rtvi.push_transport_message(model, exclude_none)
885
1000
 
886
1001
  async def on_push_frame(self, data: FramePushed):
887
1002
  """Process a frame being pushed through the pipeline.
@@ -923,52 +1038,58 @@ class RTVIObserver(BaseObserver):
923
1038
  ):
924
1039
  await self._handle_context(frame)
925
1040
  elif isinstance(frame, LLMFullResponseStartFrame) and self._params.bot_llm_enabled:
926
- await self.push_transport_message_urgent(RTVIBotLLMStartedMessage())
1041
+ await self.send_rtvi_message(RTVIBotLLMStartedMessage())
927
1042
  elif isinstance(frame, LLMFullResponseEndFrame) and self._params.bot_llm_enabled:
928
- await self.push_transport_message_urgent(RTVIBotLLMStoppedMessage())
1043
+ await self.send_rtvi_message(RTVIBotLLMStoppedMessage())
929
1044
  elif isinstance(frame, LLMTextFrame) and self._params.bot_llm_enabled:
930
1045
  await self._handle_llm_text_frame(frame)
931
1046
  elif isinstance(frame, TTSStartedFrame) and self._params.bot_tts_enabled:
932
- await self.push_transport_message_urgent(RTVIBotTTSStartedMessage())
1047
+ await self.send_rtvi_message(RTVIBotTTSStartedMessage())
933
1048
  elif isinstance(frame, TTSStoppedFrame) and self._params.bot_tts_enabled:
934
- await self.push_transport_message_urgent(RTVIBotTTSStoppedMessage())
1049
+ await self.send_rtvi_message(RTVIBotTTSStoppedMessage())
935
1050
  elif isinstance(frame, TTSTextFrame) and self._params.bot_tts_enabled:
936
1051
  if isinstance(src, BaseOutputTransport):
937
1052
  message = RTVIBotTTSTextMessage(data=RTVITextMessageData(text=frame.text))
938
- await self.push_transport_message_urgent(message)
1053
+ await self.send_rtvi_message(message)
939
1054
  else:
940
1055
  mark_as_seen = False
941
1056
  elif isinstance(frame, MetricsFrame) and self._params.metrics_enabled:
942
1057
  await self._handle_metrics(frame)
943
1058
  elif isinstance(frame, RTVIServerMessageFrame):
944
1059
  message = RTVIServerMessage(data=frame.data)
945
- await self.push_transport_message_urgent(message)
1060
+ await self.send_rtvi_message(message)
946
1061
  elif isinstance(frame, RTVIServerResponseFrame):
947
1062
  if frame.error is not None:
948
1063
  await self._send_error_response(frame)
949
1064
  else:
950
1065
  await self._send_server_response(frame)
1066
+ elif isinstance(frame, InputAudioRawFrame) and self._params.user_audio_level_enabled:
1067
+ curr_time = time.time()
1068
+ diff_time = curr_time - self._last_user_audio_level
1069
+ if diff_time > self._params.audio_level_period_secs:
1070
+ level = calculate_audio_volume(frame.audio, frame.sample_rate)
1071
+ message = RTVIUserAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
1072
+ await self.send_rtvi_message(message)
1073
+ self._last_user_audio_level = curr_time
1074
+ elif isinstance(frame, TTSAudioRawFrame) and self._params.bot_audio_level_enabled:
1075
+ curr_time = time.time()
1076
+ diff_time = curr_time - self._last_bot_audio_level
1077
+ if diff_time > self._params.audio_level_period_secs:
1078
+ level = calculate_audio_volume(frame.audio, frame.sample_rate)
1079
+ message = RTVIBotAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
1080
+ await self.send_rtvi_message(message)
1081
+ self._last_bot_audio_level = curr_time
951
1082
 
952
1083
  if mark_as_seen:
953
1084
  self._frames_seen.add(frame.id)
954
1085
 
955
- async def push_transport_message_urgent(self, model: BaseModel, exclude_none: bool = True):
956
- """Push an urgent transport message to the RTVI processor.
957
-
958
- Args:
959
- model: The message model to send.
960
- exclude_none: Whether to exclude None values from the model dump.
961
- """
962
- frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
963
- await self._rtvi.push_frame(frame)
964
-
965
1086
  async def _push_bot_transcription(self):
966
1087
  """Push accumulated bot transcription as a message."""
967
1088
  if len(self._bot_transcription) > 0:
968
1089
  message = RTVIBotTranscriptionMessage(
969
1090
  data=RTVITextMessageData(text=self._bot_transcription)
970
1091
  )
971
- await self.push_transport_message_urgent(message)
1092
+ await self.send_rtvi_message(message)
972
1093
  self._bot_transcription = ""
973
1094
 
974
1095
  async def _handle_interruptions(self, frame: Frame):
@@ -980,7 +1101,7 @@ class RTVIObserver(BaseObserver):
980
1101
  message = RTVIUserStoppedSpeakingMessage()
981
1102
 
982
1103
  if message:
983
- await self.push_transport_message_urgent(message)
1104
+ await self.send_rtvi_message(message)
984
1105
 
985
1106
  async def _handle_bot_speaking(self, frame: Frame):
986
1107
  """Handle bot speaking event frames."""
@@ -991,12 +1112,12 @@ class RTVIObserver(BaseObserver):
991
1112
  message = RTVIBotStoppedSpeakingMessage()
992
1113
 
993
1114
  if message:
994
- await self.push_transport_message_urgent(message)
1115
+ await self.send_rtvi_message(message)
995
1116
 
996
1117
  async def _handle_llm_text_frame(self, frame: LLMTextFrame):
997
1118
  """Handle LLM text output frames."""
998
1119
  message = RTVIBotLLMTextMessage(data=RTVITextMessageData(text=frame.text))
999
- await self.push_transport_message_urgent(message)
1120
+ await self.send_rtvi_message(message)
1000
1121
 
1001
1122
  self._bot_transcription += frame.text
1002
1123
  if match_endofsentence(self._bot_transcription):
@@ -1019,7 +1140,7 @@ class RTVIObserver(BaseObserver):
1019
1140
  )
1020
1141
 
1021
1142
  if message:
1022
- await self.push_transport_message_urgent(message)
1143
+ await self.send_rtvi_message(message)
1023
1144
 
1024
1145
  async def _handle_context(self, frame: OpenAILLMContextFrame | LLMContextFrame):
1025
1146
  """Process LLM context frames to extract user messages for the RTVI client."""
@@ -1039,7 +1160,7 @@ class RTVIObserver(BaseObserver):
1039
1160
  text = "".join(part.text for part in message.parts if hasattr(part, "text"))
1040
1161
  if text:
1041
1162
  rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
1042
- await self.push_transport_message_urgent(rtvi_message)
1163
+ await self.send_rtvi_message(rtvi_message)
1043
1164
 
1044
1165
  # Handle OpenAI format (original implementation)
1045
1166
  elif isinstance(message, dict):
@@ -1050,7 +1171,7 @@ class RTVIObserver(BaseObserver):
1050
1171
  else:
1051
1172
  text = content
1052
1173
  rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
1053
- await self.push_transport_message_urgent(rtvi_message)
1174
+ await self.send_rtvi_message(rtvi_message)
1054
1175
 
1055
1176
  except Exception as e:
1056
1177
  logger.warning(f"Caught an error while trying to handle context: {e}")
@@ -1077,7 +1198,7 @@ class RTVIObserver(BaseObserver):
1077
1198
  metrics["characters"].append(d.model_dump(exclude_none=True))
1078
1199
 
1079
1200
  message = RTVIMetricsMessage(data=metrics)
1080
- await self.push_transport_message_urgent(message)
1201
+ await self.send_rtvi_message(message)
1081
1202
 
1082
1203
  async def _send_server_response(self, frame: RTVIServerResponseFrame):
1083
1204
  """Send a response to the client for a specific request."""
@@ -1085,15 +1206,14 @@ class RTVIObserver(BaseObserver):
1085
1206
  id=str(frame.client_msg.msg_id),
1086
1207
  data=RTVIRawServerResponseData(t=frame.client_msg.type, d=frame.data),
1087
1208
  )
1088
- await self.push_transport_message_urgent(message)
1209
+ await self.send_rtvi_message(message)
1089
1210
 
1090
1211
  async def _send_error_response(self, frame: RTVIServerResponseFrame):
1091
1212
  """Send a response to the client for a specific request."""
1092
- if self._params.errors_enabled:
1093
- message = RTVIErrorResponse(
1094
- id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
1095
- )
1096
- await self.push_transport_message_urgent(message)
1213
+ message = RTVIErrorResponse(
1214
+ id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
1215
+ )
1216
+ await self.send_rtvi_message(message)
1097
1217
 
1098
1218
 
1099
1219
  class RTVIProcessor(FrameProcessor):
@@ -1127,7 +1247,7 @@ class RTVIProcessor(FrameProcessor):
1127
1247
  # Default to 0.3.0 which is the last version before actually having a
1128
1248
  # "client-version".
1129
1249
  self._client_version = [0, 3, 0]
1130
- self._errors_enabled = True
1250
+ self._skip_tts: bool = False # Keep in sync with llm_service.py
1131
1251
 
1132
1252
  self._registered_actions: Dict[str, RTVIAction] = {}
1133
1253
  self._registered_services: Dict[str, RTVIService] = {}
@@ -1196,17 +1316,9 @@ class RTVIProcessor(FrameProcessor):
1196
1316
  await self._update_config(self._config, False)
1197
1317
  await self._send_bot_ready()
1198
1318
 
1199
- def set_errors_enabled(self, enabled: bool):
1200
- """Enable or disable error message sending.
1201
-
1202
- Args:
1203
- enabled: Whether to send error messages.
1204
- """
1205
- self._errors_enabled = enabled
1206
-
1207
1319
  async def interrupt_bot(self):
1208
1320
  """Send a bot interruption frame upstream."""
1209
- await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
1321
+ await self.push_interruption_task_frame_and_wait()
1210
1322
 
1211
1323
  async def send_server_message(self, data: Any):
1212
1324
  """Send a server message to the client."""
@@ -1232,6 +1344,13 @@ class RTVIProcessor(FrameProcessor):
1232
1344
  """
1233
1345
  await self._send_error_frame(ErrorFrame(error=error))
1234
1346
 
1347
+ async def push_transport_message(self, model: BaseModel, exclude_none: bool = True):
1348
+ """Push a transport message frame."""
1349
+ frame = OutputTransportMessageUrgentFrame(
1350
+ message=model.model_dump(exclude_none=exclude_none)
1351
+ )
1352
+ await self.push_frame(frame)
1353
+
1235
1354
  async def handle_message(self, message: RTVIMessage):
1236
1355
  """Handle an incoming RTVI message.
1237
1356
 
@@ -1252,7 +1371,7 @@ class RTVIProcessor(FrameProcessor):
1252
1371
  args=params.arguments,
1253
1372
  )
1254
1373
  message = RTVILLMFunctionCallMessage(data=fn)
1255
- await self._push_transport_message(message, exclude_none=False)
1374
+ await self.push_transport_message(message, exclude_none=False)
1256
1375
 
1257
1376
  async def handle_function_call_start(
1258
1377
  self, function_name: str, llm: FrameProcessor, context: OpenAILLMContext
@@ -1279,7 +1398,7 @@ class RTVIProcessor(FrameProcessor):
1279
1398
 
1280
1399
  fn = RTVILLMFunctionCallStartMessageData(function_name=function_name)
1281
1400
  message = RTVILLMFunctionCallStartMessage(data=fn)
1282
- await self._push_transport_message(message, exclude_none=False)
1401
+ await self.push_transport_message(message, exclude_none=False)
1283
1402
 
1284
1403
  async def process_frame(self, frame: Frame, direction: FrameDirection):
1285
1404
  """Process incoming frames through the RTVI processor.
@@ -1302,7 +1421,7 @@ class RTVIProcessor(FrameProcessor):
1302
1421
  elif isinstance(frame, ErrorFrame):
1303
1422
  await self._send_error_frame(frame)
1304
1423
  await self.push_frame(frame, direction)
1305
- elif isinstance(frame, TransportMessageUrgentFrame):
1424
+ elif isinstance(frame, InputTransportMessageFrame):
1306
1425
  await self._handle_transport_message(frame)
1307
1426
  # All other system frames
1308
1427
  elif isinstance(frame, SystemFrame):
@@ -1316,6 +1435,9 @@ class RTVIProcessor(FrameProcessor):
1316
1435
  # Data frames
1317
1436
  elif isinstance(frame, RTVIActionFrame):
1318
1437
  await self._action_queue.put(frame)
1438
+ elif isinstance(frame, LLMConfigureOutputFrame):
1439
+ self._skip_tts = frame.skip_tts
1440
+ await self.push_frame(frame, direction)
1319
1441
  # Other frames
1320
1442
  else:
1321
1443
  await self.push_frame(frame, direction)
@@ -1348,11 +1470,6 @@ class RTVIProcessor(FrameProcessor):
1348
1470
  await self.cancel_task(self._message_task)
1349
1471
  self._message_task = None
1350
1472
 
1351
- async def _push_transport_message(self, model: BaseModel, exclude_none: bool = True):
1352
- """Push a transport message frame."""
1353
- frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
1354
- await self.push_frame(frame)
1355
-
1356
1473
  async def _action_task_handler(self):
1357
1474
  """Handle incoming action frames."""
1358
1475
  while True:
@@ -1367,7 +1484,7 @@ class RTVIProcessor(FrameProcessor):
1367
1484
  await self._handle_message(message)
1368
1485
  self._message_queue.task_done()
1369
1486
 
1370
- async def _handle_transport_message(self, frame: TransportMessageUrgentFrame):
1487
+ async def _handle_transport_message(self, frame: InputTransportMessageFrame):
1371
1488
  """Handle an incoming transport message frame."""
1372
1489
  try:
1373
1490
  transport_message = frame.message
@@ -1415,7 +1532,13 @@ class RTVIProcessor(FrameProcessor):
1415
1532
  case "llm-function-call-result":
1416
1533
  data = RTVILLMFunctionCallResultData.model_validate(message.data)
1417
1534
  await self._handle_function_call_result(data)
1535
+ case "send-text":
1536
+ data = RTVISendTextData.model_validate(message.data)
1537
+ await self._handle_send_text(data)
1418
1538
  case "append-to-context":
1539
+ logger.warning(
1540
+ f"The append-to-context message is deprecated, use send-text instead."
1541
+ )
1419
1542
  data = RTVIAppendToContextData.model_validate(message.data)
1420
1543
  await self._handle_update_context(data)
1421
1544
  case "raw-audio" | "raw-audio-batch":
@@ -1483,7 +1606,7 @@ class RTVIProcessor(FrameProcessor):
1483
1606
 
1484
1607
  services = list(self._registered_services.values())
1485
1608
  message = RTVIDescribeConfig(id=request_id, data=RTVIDescribeConfigData(config=services))
1486
- await self._push_transport_message(message)
1609
+ await self.push_transport_message(message)
1487
1610
 
1488
1611
  async def _handle_describe_actions(self, request_id: str):
1489
1612
  """Handle a describe-actions request."""
@@ -1498,7 +1621,7 @@ class RTVIProcessor(FrameProcessor):
1498
1621
 
1499
1622
  actions = list(self._registered_actions.values())
1500
1623
  message = RTVIDescribeActions(id=request_id, data=RTVIDescribeActionsData(actions=actions))
1501
- await self._push_transport_message(message)
1624
+ await self.push_transport_message(message)
1502
1625
 
1503
1626
  async def _handle_get_config(self, request_id: str):
1504
1627
  """Handle a get-config request."""
@@ -1512,7 +1635,7 @@ class RTVIProcessor(FrameProcessor):
1512
1635
  )
1513
1636
 
1514
1637
  message = RTVIConfigResponse(id=request_id, data=self._config)
1515
- await self._push_transport_message(message)
1638
+ await self.push_transport_message(message)
1516
1639
 
1517
1640
  def _update_config_option(self, service: str, config: RTVIServiceOptionConfig):
1518
1641
  """Update a specific configuration option."""
@@ -1564,6 +1687,26 @@ class RTVIProcessor(FrameProcessor):
1564
1687
  await self._update_config(RTVIConfig(config=data.config), data.interrupt)
1565
1688
  await self._handle_get_config(request_id)
1566
1689
 
1690
+ async def _handle_send_text(self, data: RTVISendTextData):
1691
+ """Handle a send-text message from the client."""
1692
+ opts = data.options if data.options is not None else RTVISendTextOptions()
1693
+ if opts.run_immediately:
1694
+ await self.interrupt_bot()
1695
+ cur_skip_tts = self._skip_tts
1696
+ should_skip_tts = not opts.audio_response
1697
+ toggle_skip_tts = cur_skip_tts != should_skip_tts
1698
+ if toggle_skip_tts:
1699
+ output_frame = LLMConfigureOutputFrame(skip_tts=should_skip_tts)
1700
+ await self.push_frame(output_frame)
1701
+ text_frame = LLMMessagesAppendFrame(
1702
+ messages=[{"role": "user", "content": data.content}],
1703
+ run_llm=opts.run_immediately,
1704
+ )
1705
+ await self.push_frame(text_frame)
1706
+ if toggle_skip_tts:
1707
+ output_frame = LLMConfigureOutputFrame(skip_tts=cur_skip_tts)
1708
+ await self.push_frame(output_frame)
1709
+
1567
1710
  async def _handle_update_context(self, data: RTVIAppendToContextData):
1568
1711
  if data.run_immediately:
1569
1712
  await self.interrupt_bot()
@@ -1617,7 +1760,7 @@ class RTVIProcessor(FrameProcessor):
1617
1760
  # action responses (such as webhooks) don't set a request_id
1618
1761
  if request_id:
1619
1762
  message = RTVIActionResponse(id=request_id, data=RTVIActionResponseData(result=result))
1620
- await self._push_transport_message(message)
1763
+ await self.push_transport_message(message)
1621
1764
 
1622
1765
  async def _send_bot_ready(self):
1623
1766
  """Send the bot-ready message to the client."""
@@ -1628,23 +1771,21 @@ class RTVIProcessor(FrameProcessor):
1628
1771
  id=self._client_ready_id,
1629
1772
  data=RTVIBotReadyData(version=RTVI_PROTOCOL_VERSION, config=config),
1630
1773
  )
1631
- await self._push_transport_message(message)
1774
+ await self.push_transport_message(message)
1632
1775
 
1633
1776
  async def _send_server_message(self, message: RTVIServerMessage | RTVIServerResponse):
1634
1777
  """Send a message or response to the client."""
1635
- await self._push_transport_message(message)
1778
+ await self.push_transport_message(message)
1636
1779
 
1637
1780
  async def _send_error_frame(self, frame: ErrorFrame):
1638
1781
  """Send an error frame as an RTVI error message."""
1639
- if self._errors_enabled:
1640
- message = RTVIError(data=RTVIErrorData(error=frame.error, fatal=frame.fatal))
1641
- await self._push_transport_message(message)
1782
+ message = RTVIError(data=RTVIErrorData(error=frame.error, fatal=frame.fatal))
1783
+ await self.push_transport_message(message)
1642
1784
 
1643
1785
  async def _send_error_response(self, id: str, error: str):
1644
1786
  """Send an error response message."""
1645
- if self._errors_enabled:
1646
- message = RTVIErrorResponse(id=id, data=RTVIErrorResponseData(error=error))
1647
- await self._push_transport_message(message)
1787
+ message = RTVIErrorResponse(id=id, data=RTVIErrorResponseData(error=error))
1788
+ await self.push_transport_message(message)
1648
1789
 
1649
1790
  def _action_id(self, service: str, action: str) -> str:
1650
1791
  """Generate an action ID from service and action names."""