dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -13,6 +13,7 @@ and frame observation for the RTVI protocol.
13
13
 
14
14
  import asyncio
15
15
  import base64
16
+ import time
16
17
  from dataclasses import dataclass
17
18
  from typing import (
18
19
  Any,
@@ -29,8 +30,8 @@ from typing import (
29
30
  from loguru import logger
30
31
  from pydantic import BaseModel, Field, PrivateAttr, ValidationError
31
32
 
33
+ from pipecat.audio.utils import calculate_audio_volume
32
34
  from pipecat.frames.frames import (
33
- BotInterruptionFrame,
34
35
  BotStartedSpeakingFrame,
35
36
  BotStoppedSpeakingFrame,
36
37
  CancelFrame,
@@ -41,17 +42,20 @@ from pipecat.frames.frames import (
41
42
  Frame,
42
43
  FunctionCallResultFrame,
43
44
  InputAudioRawFrame,
45
+ InputTransportMessageFrame,
44
46
  InterimTranscriptionFrame,
47
+ LLMConfigureOutputFrame,
45
48
  LLMContextFrame,
46
49
  LLMFullResponseEndFrame,
47
50
  LLMFullResponseStartFrame,
48
51
  LLMMessagesAppendFrame,
49
52
  LLMTextFrame,
50
53
  MetricsFrame,
54
+ OutputTransportMessageUrgentFrame,
51
55
  StartFrame,
52
56
  SystemFrame,
53
57
  TranscriptionFrame,
54
- TransportMessageUrgentFrame,
58
+ TTSAudioRawFrame,
55
59
  TTSStartedFrame,
56
60
  TTSStoppedFrame,
57
61
  TTSTextFrame,
@@ -587,10 +591,35 @@ class RTVILLMFunctionCallMessage(BaseModel):
587
591
  data: RTVILLMFunctionCallMessageData
588
592
 
589
593
 
594
+ class RTVISendTextOptions(BaseModel):
595
+ """Options for sending text input to the LLM.
596
+
597
+ Contains options for how the pipeline should process the text input.
598
+ """
599
+
600
+ run_immediately: bool = True
601
+ audio_response: bool = True
602
+
603
+
604
+ class RTVISendTextData(BaseModel):
605
+ """Data format for sending text input to the LLM.
606
+
607
+ Contains the text content to send and any options for how the pipeline should process it.
608
+
609
+ """
610
+
611
+ content: str
612
+ options: Optional[RTVISendTextOptions] = None
613
+
614
+
590
615
  class RTVIAppendToContextData(BaseModel):
591
616
  """Data format for appending messages to the context.
592
617
 
593
618
  Contains the role, content, and whether to run the message immediately.
619
+
620
+ .. deprecated:: 0.0.85
621
+ The RTVI message, append-to-context, has been deprecated. Use send-text
622
+ or custom client and server messages instead.
594
623
  """
595
624
 
596
625
  role: Literal["user", "assistant"] | str
@@ -814,6 +843,36 @@ class RTVIServerMessage(BaseModel):
814
843
  data: Any
815
844
 
816
845
 
846
+ class RTVIAudioLevelMessageData(BaseModel):
847
+ """Data format for sending audio levels."""
848
+
849
+ value: float
850
+
851
+
852
+ class RTVIUserAudioLevelMessage(BaseModel):
853
+ """Message indicating user audio level."""
854
+
855
+ label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
856
+ type: Literal["user-audio-level"] = "user-audio-level"
857
+ data: RTVIAudioLevelMessageData
858
+
859
+
860
+ class RTVIBotAudioLevelMessage(BaseModel):
861
+ """Message indicating bot audio level."""
862
+
863
+ label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
864
+ type: Literal["bot-audio-level"] = "bot-audio-level"
865
+ data: RTVIAudioLevelMessageData
866
+
867
+
868
+ class RTVISystemLogMessage(BaseModel):
869
+ """Message including a system log."""
870
+
871
+ label: RTVIMessageLiteral = RTVI_MESSAGE_LABEL
872
+ type: Literal["system-log"] = "system-log"
873
+ data: RTVITextMessageData
874
+
875
+
817
876
  @dataclass
818
877
  class RTVIServerMessageFrame(SystemFrame):
819
878
  """A frame for sending server messages to the client.
@@ -833,25 +892,36 @@ class RTVIServerMessageFrame(SystemFrame):
833
892
  class RTVIObserverParams:
834
893
  """Parameters for configuring RTVI Observer behavior.
835
894
 
895
+ .. deprecated:: 0.0.87
896
+ Parameter `errors_enabled` is deprecated. Error messages are always enabled.
897
+
836
898
  Parameters:
837
899
  bot_llm_enabled: Indicates if the bot's LLM messages should be sent.
838
900
  bot_tts_enabled: Indicates if the bot's TTS messages should be sent.
839
901
  bot_speaking_enabled: Indicates if the bot's started/stopped speaking messages should be sent.
902
+ bot_audio_level_enabled: Indicates if bot's audio level messages should be sent.
840
903
  user_llm_enabled: Indicates if the user's LLM input messages should be sent.
841
904
  user_speaking_enabled: Indicates if the user's started/stopped speaking messages should be sent.
842
905
  user_transcription_enabled: Indicates if user's transcription messages should be sent.
906
+ user_audio_level_enabled: Indicates if user's audio level messages should be sent.
843
907
  metrics_enabled: Indicates if metrics messages should be sent.
844
- errors_enabled: Indicates if errors messages should be sent.
908
+ system_logs_enabled: Indicates if system logs should be sent.
909
+ errors_enabled: [Deprecated] Indicates if errors messages should be sent.
910
+ audio_level_period_secs: How often audio levels should be sent if enabled.
845
911
  """
846
912
 
847
913
  bot_llm_enabled: bool = True
848
914
  bot_tts_enabled: bool = True
849
915
  bot_speaking_enabled: bool = True
916
+ bot_audio_level_enabled: bool = False
850
917
  user_llm_enabled: bool = True
851
918
  user_speaking_enabled: bool = True
852
919
  user_transcription_enabled: bool = True
920
+ user_audio_level_enabled: bool = False
853
921
  metrics_enabled: bool = True
854
- errors_enabled: bool = True
922
+ system_logs_enabled: bool = False
923
+ errors_enabled: Optional[bool] = None
924
+ audio_level_period_secs: float = 0.15
855
925
 
856
926
 
857
927
  class RTVIObserver(BaseObserver):
@@ -867,7 +937,11 @@ class RTVIObserver(BaseObserver):
867
937
  """
868
938
 
869
939
  def __init__(
870
- self, rtvi: "RTVIProcessor", *, params: Optional[RTVIObserverParams] = None, **kwargs
940
+ self,
941
+ rtvi: Optional["RTVIProcessor"] = None,
942
+ *,
943
+ params: Optional[RTVIObserverParams] = None,
944
+ **kwargs,
871
945
  ):
872
946
  """Initialize the RTVI observer.
873
947
 
@@ -879,9 +953,50 @@ class RTVIObserver(BaseObserver):
879
953
  super().__init__(**kwargs)
880
954
  self._rtvi = rtvi
881
955
  self._params = params or RTVIObserverParams()
882
- self._bot_transcription = ""
956
+
883
957
  self._frames_seen = set()
884
- rtvi.set_errors_enabled(self._params.errors_enabled)
958
+
959
+ self._bot_transcription = ""
960
+ self._last_user_audio_level = 0
961
+ self._last_bot_audio_level = 0
962
+
963
+ if self._params.system_logs_enabled:
964
+ self._system_logger_id = logger.add(self._logger_sink)
965
+
966
+ if self._params.errors_enabled is not None:
967
+ import warnings
968
+
969
+ with warnings.catch_warnings():
970
+ warnings.simplefilter("always")
971
+ warnings.warn(
972
+ "Parameter `errors_enabled` is deprecated. Error messages are always enabled.",
973
+ DeprecationWarning,
974
+ )
975
+
976
+ async def _logger_sink(self, message):
977
+ """Logger sink so we cna send system logs to RTVI clients."""
978
+ message = RTVISystemLogMessage(data=RTVITextMessageData(text=message))
979
+ await self.send_rtvi_message(message)
980
+
981
+ async def cleanup(self):
982
+ """Cleanup RTVI observer resources."""
983
+ await super().cleanup()
984
+ if self._params.system_logs_enabled:
985
+ logger.remove(self._system_logger_id)
986
+
987
+ async def send_rtvi_message(self, model: BaseModel, exclude_none: bool = True):
988
+ """Send an RTVI message.
989
+
990
+ By default, we push a transport frame. But this function can be
991
+ overriden by subclass to send RTVI messages in different ways.
992
+
993
+ Args:
994
+ model: The message to send.
995
+ exclude_none: Whether to exclude None values from the model dump.
996
+
997
+ """
998
+ if self._rtvi:
999
+ await self._rtvi.push_transport_message(model, exclude_none)
885
1000
 
886
1001
  async def on_push_frame(self, data: FramePushed):
887
1002
  """Process a frame being pushed through the pipeline.
@@ -903,6 +1018,7 @@ class RTVIObserver(BaseObserver):
903
1018
 
904
1019
  if (
905
1020
  isinstance(frame, (UserStartedSpeakingFrame, UserStoppedSpeakingFrame))
1021
+ and (direction == FrameDirection.DOWNSTREAM)
906
1022
  and self._params.user_speaking_enabled
907
1023
  ):
908
1024
  await self._handle_interruptions(frame)
@@ -923,52 +1039,58 @@ class RTVIObserver(BaseObserver):
923
1039
  ):
924
1040
  await self._handle_context(frame)
925
1041
  elif isinstance(frame, LLMFullResponseStartFrame) and self._params.bot_llm_enabled:
926
- await self.push_transport_message_urgent(RTVIBotLLMStartedMessage())
1042
+ await self.send_rtvi_message(RTVIBotLLMStartedMessage())
927
1043
  elif isinstance(frame, LLMFullResponseEndFrame) and self._params.bot_llm_enabled:
928
- await self.push_transport_message_urgent(RTVIBotLLMStoppedMessage())
1044
+ await self.send_rtvi_message(RTVIBotLLMStoppedMessage())
929
1045
  elif isinstance(frame, LLMTextFrame) and self._params.bot_llm_enabled:
930
1046
  await self._handle_llm_text_frame(frame)
931
1047
  elif isinstance(frame, TTSStartedFrame) and self._params.bot_tts_enabled:
932
- await self.push_transport_message_urgent(RTVIBotTTSStartedMessage())
1048
+ await self.send_rtvi_message(RTVIBotTTSStartedMessage())
933
1049
  elif isinstance(frame, TTSStoppedFrame) and self._params.bot_tts_enabled:
934
- await self.push_transport_message_urgent(RTVIBotTTSStoppedMessage())
1050
+ await self.send_rtvi_message(RTVIBotTTSStoppedMessage())
935
1051
  elif isinstance(frame, TTSTextFrame) and self._params.bot_tts_enabled:
936
1052
  if isinstance(src, BaseOutputTransport):
937
1053
  message = RTVIBotTTSTextMessage(data=RTVITextMessageData(text=frame.text))
938
- await self.push_transport_message_urgent(message)
1054
+ await self.send_rtvi_message(message)
939
1055
  else:
940
1056
  mark_as_seen = False
941
1057
  elif isinstance(frame, MetricsFrame) and self._params.metrics_enabled:
942
1058
  await self._handle_metrics(frame)
943
1059
  elif isinstance(frame, RTVIServerMessageFrame):
944
1060
  message = RTVIServerMessage(data=frame.data)
945
- await self.push_transport_message_urgent(message)
1061
+ await self.send_rtvi_message(message)
946
1062
  elif isinstance(frame, RTVIServerResponseFrame):
947
1063
  if frame.error is not None:
948
1064
  await self._send_error_response(frame)
949
1065
  else:
950
1066
  await self._send_server_response(frame)
1067
+ elif isinstance(frame, InputAudioRawFrame) and self._params.user_audio_level_enabled:
1068
+ curr_time = time.time()
1069
+ diff_time = curr_time - self._last_user_audio_level
1070
+ if diff_time > self._params.audio_level_period_secs:
1071
+ level = calculate_audio_volume(frame.audio, frame.sample_rate)
1072
+ message = RTVIUserAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
1073
+ await self.send_rtvi_message(message)
1074
+ self._last_user_audio_level = curr_time
1075
+ elif isinstance(frame, TTSAudioRawFrame) and self._params.bot_audio_level_enabled:
1076
+ curr_time = time.time()
1077
+ diff_time = curr_time - self._last_bot_audio_level
1078
+ if diff_time > self._params.audio_level_period_secs:
1079
+ level = calculate_audio_volume(frame.audio, frame.sample_rate)
1080
+ message = RTVIBotAudioLevelMessage(data=RTVIAudioLevelMessageData(value=level))
1081
+ await self.send_rtvi_message(message)
1082
+ self._last_bot_audio_level = curr_time
951
1083
 
952
1084
  if mark_as_seen:
953
1085
  self._frames_seen.add(frame.id)
954
1086
 
955
- async def push_transport_message_urgent(self, model: BaseModel, exclude_none: bool = True):
956
- """Push an urgent transport message to the RTVI processor.
957
-
958
- Args:
959
- model: The message model to send.
960
- exclude_none: Whether to exclude None values from the model dump.
961
- """
962
- frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
963
- await self._rtvi.push_frame(frame)
964
-
965
1087
  async def _push_bot_transcription(self):
966
1088
  """Push accumulated bot transcription as a message."""
967
1089
  if len(self._bot_transcription) > 0:
968
1090
  message = RTVIBotTranscriptionMessage(
969
1091
  data=RTVITextMessageData(text=self._bot_transcription)
970
1092
  )
971
- await self.push_transport_message_urgent(message)
1093
+ await self.send_rtvi_message(message)
972
1094
  self._bot_transcription = ""
973
1095
 
974
1096
  async def _handle_interruptions(self, frame: Frame):
@@ -980,7 +1102,7 @@ class RTVIObserver(BaseObserver):
980
1102
  message = RTVIUserStoppedSpeakingMessage()
981
1103
 
982
1104
  if message:
983
- await self.push_transport_message_urgent(message)
1105
+ await self.send_rtvi_message(message)
984
1106
 
985
1107
  async def _handle_bot_speaking(self, frame: Frame):
986
1108
  """Handle bot speaking event frames."""
@@ -991,12 +1113,12 @@ class RTVIObserver(BaseObserver):
991
1113
  message = RTVIBotStoppedSpeakingMessage()
992
1114
 
993
1115
  if message:
994
- await self.push_transport_message_urgent(message)
1116
+ await self.send_rtvi_message(message)
995
1117
 
996
1118
  async def _handle_llm_text_frame(self, frame: LLMTextFrame):
997
1119
  """Handle LLM text output frames."""
998
1120
  message = RTVIBotLLMTextMessage(data=RTVITextMessageData(text=frame.text))
999
- await self.push_transport_message_urgent(message)
1121
+ await self.send_rtvi_message(message)
1000
1122
 
1001
1123
  self._bot_transcription += frame.text
1002
1124
  if match_endofsentence(self._bot_transcription):
@@ -1019,7 +1141,7 @@ class RTVIObserver(BaseObserver):
1019
1141
  )
1020
1142
 
1021
1143
  if message:
1022
- await self.push_transport_message_urgent(message)
1144
+ await self.send_rtvi_message(message)
1023
1145
 
1024
1146
  async def _handle_context(self, frame: OpenAILLMContextFrame | LLMContextFrame):
1025
1147
  """Process LLM context frames to extract user messages for the RTVI client."""
@@ -1039,7 +1161,7 @@ class RTVIObserver(BaseObserver):
1039
1161
  text = "".join(part.text for part in message.parts if hasattr(part, "text"))
1040
1162
  if text:
1041
1163
  rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
1042
- await self.push_transport_message_urgent(rtvi_message)
1164
+ await self.send_rtvi_message(rtvi_message)
1043
1165
 
1044
1166
  # Handle OpenAI format (original implementation)
1045
1167
  elif isinstance(message, dict):
@@ -1050,7 +1172,7 @@ class RTVIObserver(BaseObserver):
1050
1172
  else:
1051
1173
  text = content
1052
1174
  rtvi_message = RTVIUserLLMTextMessage(data=RTVITextMessageData(text=text))
1053
- await self.push_transport_message_urgent(rtvi_message)
1175
+ await self.send_rtvi_message(rtvi_message)
1054
1176
 
1055
1177
  except Exception as e:
1056
1178
  logger.warning(f"Caught an error while trying to handle context: {e}")
@@ -1077,7 +1199,7 @@ class RTVIObserver(BaseObserver):
1077
1199
  metrics["characters"].append(d.model_dump(exclude_none=True))
1078
1200
 
1079
1201
  message = RTVIMetricsMessage(data=metrics)
1080
- await self.push_transport_message_urgent(message)
1202
+ await self.send_rtvi_message(message)
1081
1203
 
1082
1204
  async def _send_server_response(self, frame: RTVIServerResponseFrame):
1083
1205
  """Send a response to the client for a specific request."""
@@ -1085,15 +1207,14 @@ class RTVIObserver(BaseObserver):
1085
1207
  id=str(frame.client_msg.msg_id),
1086
1208
  data=RTVIRawServerResponseData(t=frame.client_msg.type, d=frame.data),
1087
1209
  )
1088
- await self.push_transport_message_urgent(message)
1210
+ await self.send_rtvi_message(message)
1089
1211
 
1090
1212
  async def _send_error_response(self, frame: RTVIServerResponseFrame):
1091
1213
  """Send a response to the client for a specific request."""
1092
- if self._params.errors_enabled:
1093
- message = RTVIErrorResponse(
1094
- id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
1095
- )
1096
- await self.push_transport_message_urgent(message)
1214
+ message = RTVIErrorResponse(
1215
+ id=str(frame.client_msg.msg_id), data=RTVIErrorResponseData(error=frame.error)
1216
+ )
1217
+ await self.send_rtvi_message(message)
1097
1218
 
1098
1219
 
1099
1220
  class RTVIProcessor(FrameProcessor):
@@ -1127,7 +1248,7 @@ class RTVIProcessor(FrameProcessor):
1127
1248
  # Default to 0.3.0 which is the last version before actually having a
1128
1249
  # "client-version".
1129
1250
  self._client_version = [0, 3, 0]
1130
- self._errors_enabled = True
1251
+ self._skip_tts: bool = False # Keep in sync with llm_service.py
1131
1252
 
1132
1253
  self._registered_actions: Dict[str, RTVIAction] = {}
1133
1254
  self._registered_services: Dict[str, RTVIService] = {}
@@ -1196,17 +1317,9 @@ class RTVIProcessor(FrameProcessor):
1196
1317
  await self._update_config(self._config, False)
1197
1318
  await self._send_bot_ready()
1198
1319
 
1199
- def set_errors_enabled(self, enabled: bool):
1200
- """Enable or disable error message sending.
1201
-
1202
- Args:
1203
- enabled: Whether to send error messages.
1204
- """
1205
- self._errors_enabled = enabled
1206
-
1207
1320
  async def interrupt_bot(self):
1208
1321
  """Send a bot interruption frame upstream."""
1209
- await self.push_frame(BotInterruptionFrame(), FrameDirection.UPSTREAM)
1322
+ await self.push_interruption_task_frame_and_wait()
1210
1323
 
1211
1324
  async def send_server_message(self, data: Any):
1212
1325
  """Send a server message to the client."""
@@ -1232,6 +1345,13 @@ class RTVIProcessor(FrameProcessor):
1232
1345
  """
1233
1346
  await self._send_error_frame(ErrorFrame(error=error))
1234
1347
 
1348
+ async def push_transport_message(self, model: BaseModel, exclude_none: bool = True):
1349
+ """Push a transport message frame."""
1350
+ frame = OutputTransportMessageUrgentFrame(
1351
+ message=model.model_dump(exclude_none=exclude_none)
1352
+ )
1353
+ await self.push_frame(frame)
1354
+
1235
1355
  async def handle_message(self, message: RTVIMessage):
1236
1356
  """Handle an incoming RTVI message.
1237
1357
 
@@ -1252,7 +1372,7 @@ class RTVIProcessor(FrameProcessor):
1252
1372
  args=params.arguments,
1253
1373
  )
1254
1374
  message = RTVILLMFunctionCallMessage(data=fn)
1255
- await self._push_transport_message(message, exclude_none=False)
1375
+ await self.push_transport_message(message, exclude_none=False)
1256
1376
 
1257
1377
  async def handle_function_call_start(
1258
1378
  self, function_name: str, llm: FrameProcessor, context: OpenAILLMContext
@@ -1279,7 +1399,7 @@ class RTVIProcessor(FrameProcessor):
1279
1399
 
1280
1400
  fn = RTVILLMFunctionCallStartMessageData(function_name=function_name)
1281
1401
  message = RTVILLMFunctionCallStartMessage(data=fn)
1282
- await self._push_transport_message(message, exclude_none=False)
1402
+ await self.push_transport_message(message, exclude_none=False)
1283
1403
 
1284
1404
  async def process_frame(self, frame: Frame, direction: FrameDirection):
1285
1405
  """Process incoming frames through the RTVI processor.
@@ -1302,7 +1422,7 @@ class RTVIProcessor(FrameProcessor):
1302
1422
  elif isinstance(frame, ErrorFrame):
1303
1423
  await self._send_error_frame(frame)
1304
1424
  await self.push_frame(frame, direction)
1305
- elif isinstance(frame, TransportMessageUrgentFrame):
1425
+ elif isinstance(frame, InputTransportMessageFrame):
1306
1426
  await self._handle_transport_message(frame)
1307
1427
  # All other system frames
1308
1428
  elif isinstance(frame, SystemFrame):
@@ -1316,6 +1436,9 @@ class RTVIProcessor(FrameProcessor):
1316
1436
  # Data frames
1317
1437
  elif isinstance(frame, RTVIActionFrame):
1318
1438
  await self._action_queue.put(frame)
1439
+ elif isinstance(frame, LLMConfigureOutputFrame):
1440
+ self._skip_tts = frame.skip_tts
1441
+ await self.push_frame(frame, direction)
1319
1442
  # Other frames
1320
1443
  else:
1321
1444
  await self.push_frame(frame, direction)
@@ -1348,11 +1471,6 @@ class RTVIProcessor(FrameProcessor):
1348
1471
  await self.cancel_task(self._message_task)
1349
1472
  self._message_task = None
1350
1473
 
1351
- async def _push_transport_message(self, model: BaseModel, exclude_none: bool = True):
1352
- """Push a transport message frame."""
1353
- frame = TransportMessageUrgentFrame(message=model.model_dump(exclude_none=exclude_none))
1354
- await self.push_frame(frame)
1355
-
1356
1474
  async def _action_task_handler(self):
1357
1475
  """Handle incoming action frames."""
1358
1476
  while True:
@@ -1367,7 +1485,7 @@ class RTVIProcessor(FrameProcessor):
1367
1485
  await self._handle_message(message)
1368
1486
  self._message_queue.task_done()
1369
1487
 
1370
- async def _handle_transport_message(self, frame: TransportMessageUrgentFrame):
1488
+ async def _handle_transport_message(self, frame: InputTransportMessageFrame):
1371
1489
  """Handle an incoming transport message frame."""
1372
1490
  try:
1373
1491
  transport_message = frame.message
@@ -1415,7 +1533,13 @@ class RTVIProcessor(FrameProcessor):
1415
1533
  case "llm-function-call-result":
1416
1534
  data = RTVILLMFunctionCallResultData.model_validate(message.data)
1417
1535
  await self._handle_function_call_result(data)
1536
+ case "send-text":
1537
+ data = RTVISendTextData.model_validate(message.data)
1538
+ await self._handle_send_text(data)
1418
1539
  case "append-to-context":
1540
+ logger.warning(
1541
+ f"The append-to-context message is deprecated, use send-text instead."
1542
+ )
1419
1543
  data = RTVIAppendToContextData.model_validate(message.data)
1420
1544
  await self._handle_update_context(data)
1421
1545
  case "raw-audio" | "raw-audio-batch":
@@ -1483,7 +1607,7 @@ class RTVIProcessor(FrameProcessor):
1483
1607
 
1484
1608
  services = list(self._registered_services.values())
1485
1609
  message = RTVIDescribeConfig(id=request_id, data=RTVIDescribeConfigData(config=services))
1486
- await self._push_transport_message(message)
1610
+ await self.push_transport_message(message)
1487
1611
 
1488
1612
  async def _handle_describe_actions(self, request_id: str):
1489
1613
  """Handle a describe-actions request."""
@@ -1498,7 +1622,7 @@ class RTVIProcessor(FrameProcessor):
1498
1622
 
1499
1623
  actions = list(self._registered_actions.values())
1500
1624
  message = RTVIDescribeActions(id=request_id, data=RTVIDescribeActionsData(actions=actions))
1501
- await self._push_transport_message(message)
1625
+ await self.push_transport_message(message)
1502
1626
 
1503
1627
  async def _handle_get_config(self, request_id: str):
1504
1628
  """Handle a get-config request."""
@@ -1512,7 +1636,7 @@ class RTVIProcessor(FrameProcessor):
1512
1636
  )
1513
1637
 
1514
1638
  message = RTVIConfigResponse(id=request_id, data=self._config)
1515
- await self._push_transport_message(message)
1639
+ await self.push_transport_message(message)
1516
1640
 
1517
1641
  def _update_config_option(self, service: str, config: RTVIServiceOptionConfig):
1518
1642
  """Update a specific configuration option."""
@@ -1564,6 +1688,26 @@ class RTVIProcessor(FrameProcessor):
1564
1688
  await self._update_config(RTVIConfig(config=data.config), data.interrupt)
1565
1689
  await self._handle_get_config(request_id)
1566
1690
 
1691
+ async def _handle_send_text(self, data: RTVISendTextData):
1692
+ """Handle a send-text message from the client."""
1693
+ opts = data.options if data.options is not None else RTVISendTextOptions()
1694
+ if opts.run_immediately:
1695
+ await self.interrupt_bot()
1696
+ cur_skip_tts = self._skip_tts
1697
+ should_skip_tts = not opts.audio_response
1698
+ toggle_skip_tts = cur_skip_tts != should_skip_tts
1699
+ if toggle_skip_tts:
1700
+ output_frame = LLMConfigureOutputFrame(skip_tts=should_skip_tts)
1701
+ await self.push_frame(output_frame)
1702
+ text_frame = LLMMessagesAppendFrame(
1703
+ messages=[{"role": "user", "content": data.content}],
1704
+ run_llm=opts.run_immediately,
1705
+ )
1706
+ await self.push_frame(text_frame)
1707
+ if toggle_skip_tts:
1708
+ output_frame = LLMConfigureOutputFrame(skip_tts=cur_skip_tts)
1709
+ await self.push_frame(output_frame)
1710
+
1567
1711
  async def _handle_update_context(self, data: RTVIAppendToContextData):
1568
1712
  if data.run_immediately:
1569
1713
  await self.interrupt_bot()
@@ -1617,7 +1761,7 @@ class RTVIProcessor(FrameProcessor):
1617
1761
  # action responses (such as webhooks) don't set a request_id
1618
1762
  if request_id:
1619
1763
  message = RTVIActionResponse(id=request_id, data=RTVIActionResponseData(result=result))
1620
- await self._push_transport_message(message)
1764
+ await self.push_transport_message(message)
1621
1765
 
1622
1766
  async def _send_bot_ready(self):
1623
1767
  """Send the bot-ready message to the client."""
@@ -1628,23 +1772,21 @@ class RTVIProcessor(FrameProcessor):
1628
1772
  id=self._client_ready_id,
1629
1773
  data=RTVIBotReadyData(version=RTVI_PROTOCOL_VERSION, config=config),
1630
1774
  )
1631
- await self._push_transport_message(message)
1775
+ await self.push_transport_message(message)
1632
1776
 
1633
1777
  async def _send_server_message(self, message: RTVIServerMessage | RTVIServerResponse):
1634
1778
  """Send a message or response to the client."""
1635
- await self._push_transport_message(message)
1779
+ await self.push_transport_message(message)
1636
1780
 
1637
1781
  async def _send_error_frame(self, frame: ErrorFrame):
1638
1782
  """Send an error frame as an RTVI error message."""
1639
- if self._errors_enabled:
1640
- message = RTVIError(data=RTVIErrorData(error=frame.error, fatal=frame.fatal))
1641
- await self._push_transport_message(message)
1783
+ message = RTVIError(data=RTVIErrorData(error=frame.error, fatal=frame.fatal))
1784
+ await self.push_transport_message(message)
1642
1785
 
1643
1786
  async def _send_error_response(self, id: str, error: str):
1644
1787
  """Send an error response message."""
1645
- if self._errors_enabled:
1646
- message = RTVIErrorResponse(id=id, data=RTVIErrorResponseData(error=error))
1647
- await self._push_transport_message(message)
1788
+ message = RTVIErrorResponse(id=id, data=RTVIErrorResponseData(error=error))
1789
+ await self.push_transport_message(message)
1648
1790
 
1649
1791
  def _action_id(self, service: str, action: str) -> str:
1650
1792
  """Generate an action ID from service and action names."""