dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
@@ -15,11 +15,12 @@ import pipecat.frames.protobufs.frames_pb2 as frame_protos
15
15
  from pipecat.frames.frames import (
16
16
  Frame,
17
17
  InputAudioRawFrame,
18
+ InputTransportMessageFrame,
18
19
  OutputAudioRawFrame,
20
+ OutputTransportMessageFrame,
21
+ OutputTransportMessageUrgentFrame,
19
22
  TextFrame,
20
23
  TranscriptionFrame,
21
- TransportMessageFrame,
22
- TransportMessageUrgentFrame,
23
24
  )
24
25
  from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
25
26
 
@@ -82,7 +83,7 @@ class ProtobufFrameSerializer(FrameSerializer):
82
83
  Serialized frame as bytes, or None if frame type is not serializable.
83
84
  """
84
85
  # Wrapping this messages as a JSONFrame to send
85
- if isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
86
+ if isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
86
87
  frame = MessageFrame(
87
88
  data=json.dumps(frame.message),
88
89
  )
@@ -134,11 +135,11 @@ class ProtobufFrameSerializer(FrameSerializer):
134
135
  if "pts" in args_dict:
135
136
  del args_dict["pts"]
136
137
 
137
- # Special handling for MessageFrame -> TransportMessageUrgentFrame
138
+ # Special handling for MessageFrame -> OutputTransportMessageUrgentFrame
138
139
  if class_name == MessageFrame:
139
140
  try:
140
141
  msg = json.loads(args_dict["data"])
141
- instance = TransportMessageUrgentFrame(message=msg)
142
+ instance = InputTransportMessageFrame(message=msg)
142
143
  logger.debug(f"ProtobufFrameSerializer: Transport message {instance}")
143
144
  except Exception as e:
144
145
  logger.error(f"Error parsing MessageFrame data: {e}")
@@ -29,8 +29,8 @@ from pipecat.frames.frames import (
29
29
  Frame,
30
30
  InputAudioRawFrame,
31
31
  InputDTMFFrame,
32
+ InterruptionFrame,
32
33
  StartFrame,
33
- StartInterruptionFrame,
34
34
  )
35
35
  from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
36
36
 
@@ -137,7 +137,7 @@ class TelnyxFrameSerializer(FrameSerializer):
137
137
  self._hangup_attempted = True
138
138
  await self._hang_up_call()
139
139
  return None
140
- elif isinstance(frame, StartInterruptionFrame):
140
+ elif isinstance(frame, InterruptionFrame):
141
141
  answer = {"event": "clear"}
142
142
  return json.dumps(answer)
143
143
  elif isinstance(frame, AudioRawFrame):
@@ -22,10 +22,10 @@ from pipecat.frames.frames import (
22
22
  Frame,
23
23
  InputAudioRawFrame,
24
24
  InputDTMFFrame,
25
+ InterruptionFrame,
26
+ OutputTransportMessageFrame,
27
+ OutputTransportMessageUrgentFrame,
25
28
  StartFrame,
26
- StartInterruptionFrame,
27
- TransportMessageFrame,
28
- TransportMessageUrgentFrame,
29
29
  )
30
30
  from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
31
31
 
@@ -61,6 +61,8 @@ class TwilioFrameSerializer(FrameSerializer):
61
61
  call_sid: Optional[str] = None,
62
62
  account_sid: Optional[str] = None,
63
63
  auth_token: Optional[str] = None,
64
+ region: Optional[str] = None,
65
+ edge: Optional[str] = None,
64
66
  params: Optional[InputParams] = None,
65
67
  ):
66
68
  """Initialize the TwilioFrameSerializer.
@@ -70,13 +72,42 @@ class TwilioFrameSerializer(FrameSerializer):
70
72
  call_sid: The associated Twilio Call SID (optional, but required for auto hang-up).
71
73
  account_sid: Twilio account SID (required for auto hang-up).
72
74
  auth_token: Twilio auth token (required for auto hang-up).
75
+ region: Twilio region (e.g., "au1", "ie1"). Must be specified with edge.
76
+ edge: Twilio edge location (e.g., "sydney", "dublin"). Must be specified with region.
73
77
  params: Configuration parameters.
74
78
  """
79
+ self._params = params or TwilioFrameSerializer.InputParams()
80
+
81
+ # Validate hangup-related parameters if auto_hang_up is enabled
82
+ if self._params.auto_hang_up:
83
+ # Validate required credentials
84
+ missing_credentials = []
85
+ if not call_sid:
86
+ missing_credentials.append("call_sid")
87
+ if not account_sid:
88
+ missing_credentials.append("account_sid")
89
+ if not auth_token:
90
+ missing_credentials.append("auth_token")
91
+
92
+ if missing_credentials:
93
+ raise ValueError(
94
+ f"auto_hang_up is enabled but missing required parameters: {', '.join(missing_credentials)}"
95
+ )
96
+
97
+ # Validate region and edge are both provided if either is specified
98
+ if (region and not edge) or (edge and not region):
99
+ raise ValueError(
100
+ "Both edge and region parameters are required if one is set. "
101
+ f"Twilio's FQDN format requires both: api.{{edge}}.{{region}}.twilio.com. "
102
+ f"Got: region='{region}', edge='{edge}'"
103
+ )
104
+
75
105
  self._stream_sid = stream_sid
76
106
  self._call_sid = call_sid
77
107
  self._account_sid = account_sid
78
108
  self._auth_token = auth_token
79
- self._params = params or TwilioFrameSerializer.InputParams()
109
+ self._region = region
110
+ self._edge = edge
80
111
 
81
112
  self._twilio_sample_rate = self._params.twilio_sample_rate
82
113
  self._sample_rate = 0 # Pipeline input rate
@@ -122,7 +153,7 @@ class TwilioFrameSerializer(FrameSerializer):
122
153
  self._hangup_attempted = True
123
154
  await self._hang_up_call()
124
155
  return None
125
- elif isinstance(frame, StartInterruptionFrame):
156
+ elif isinstance(frame, InterruptionFrame):
126
157
  answer = {"event": "clear", "streamSid": self._stream_sid}
127
158
  return json.dumps(answer)
128
159
  elif isinstance(frame, AudioRawFrame):
@@ -135,7 +166,7 @@ class TwilioFrameSerializer(FrameSerializer):
135
166
  if serialized_data is None or len(serialized_data) == 0:
136
167
  # Ignoring in case we don't have audio
137
168
  return None
138
-
169
+
139
170
  payload = base64.b64encode(serialized_data).decode("utf-8")
140
171
  answer = {
141
172
  "event": "media",
@@ -144,7 +175,7 @@ class TwilioFrameSerializer(FrameSerializer):
144
175
  }
145
176
 
146
177
  return json.dumps(answer)
147
- elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
178
+ elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
148
179
  return json.dumps(frame.message)
149
180
 
150
181
  # Return None for unhandled frames
@@ -158,25 +189,14 @@ class TwilioFrameSerializer(FrameSerializer):
158
189
  account_sid = self._account_sid
159
190
  auth_token = self._auth_token
160
191
  call_sid = self._call_sid
192
+ region = self._region
193
+ edge = self._edge
161
194
 
162
- if not call_sid or not account_sid or not auth_token:
163
- missing = []
164
- if not call_sid:
165
- missing.append("call_sid")
166
- if not account_sid:
167
- missing.append("account_sid")
168
- if not auth_token:
169
- missing.append("auth_token")
170
-
171
- logger.warning(
172
- f"Cannot hang up Twilio call: missing required parameters: {', '.join(missing)}"
173
- )
174
- return
195
+ region_prefix = f"{region}." if region else ""
196
+ edge_prefix = f"{edge}." if edge else ""
175
197
 
176
198
  # Twilio API endpoint for updating calls
177
- endpoint = (
178
- f"https://api.twilio.com/2010-04-01/Accounts/{account_sid}/Calls/{call_sid}.json"
179
- )
199
+ endpoint = f"https://api.{edge_prefix}{region_prefix}twilio.com/2010-04-01/Accounts/{account_sid}/Calls/{call_sid}.json"
180
200
 
181
201
  # Create basic auth from account_sid and auth_token
182
202
  auth = aiohttp.BasicAuth(account_sid, auth_token)
@@ -0,0 +1,324 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Vodafone Idea (VI) WebSocket frame serializer for audio streaming and call management."""
8
+
9
+ import base64
10
+ import json
11
+ from datetime import datetime, timezone
12
+ from typing import Optional
13
+
14
+ from loguru import logger
15
+ from pydantic import BaseModel
16
+
17
+ from pipecat.audio.utils import create_default_resampler
18
+ from pipecat.frames.frames import (
19
+ AudioRawFrame,
20
+ CancelFrame,
21
+ EndFrame,
22
+ Frame,
23
+ InputAudioRawFrame,
24
+ InputDTMFFrame,
25
+ KeypadEntry,
26
+ StartFrame,
27
+ StartInterruptionFrame,
28
+ TransportMessageFrame,
29
+ TransportMessageUrgentFrame,
30
+ )
31
+ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
32
+
33
+
34
+ class VIFrameSerializer(FrameSerializer):
35
+ """Serializer for Vodafone Idea (VI) WebSocket protocol.
36
+
37
+ This serializer handles converting between Pipecat frames and VI's WebSocket
38
+ protocol for bidirectional audio streaming. It supports audio conversion, DTMF events,
39
+ and real-time communication with VI telephony systems.
40
+
41
+ VI WebSocket protocol requirements:
42
+ - PCM audio format at 8kHz sample rate
43
+ - 16-bit Linear PCM encoding
44
+ - Base64 encoded audio payloads
45
+ - JSON message format for control and media events
46
+ - Bitrate: 128 Kbps
47
+
48
+ Events (VI → Endpoint):
49
+ - connected: WebSocket connection established
50
+ - start: Stream session started with call/stream IDs
51
+ - media: Audio data in Base64-encoded PCM
52
+ - dtmf: Keypad digit pressed
53
+ - stop: Stream ended
54
+ - mark: Audio playback checkpoint confirmation
55
+
56
+ Events (Endpoint → VI):
57
+ - media: Send audio back to VI
58
+ - mark: Request acknowledgment for audio playback
59
+ - clear: Clear queued audio (interruption)
60
+ - exit: Terminate session gracefully
61
+ """
62
+
63
+ class InputParams(BaseModel):
64
+ """Configuration parameters for VIFrameSerializer.
65
+
66
+ Attributes:
67
+ vi_sample_rate: Sample rate used by VI, defaults to 8000 Hz (telephony standard).
68
+ sample_rate: Optional override for pipeline input sample rate.
69
+ auto_hang_up: Whether to automatically terminate call on EndFrame.
70
+ """
71
+
72
+ vi_sample_rate: int = 8000
73
+ sample_rate: Optional[int] = None
74
+ auto_hang_up: bool = False
75
+
76
+ def __init__(
77
+ self,
78
+ stream_id: str,
79
+ call_id: Optional[str] = None,
80
+ params: Optional[InputParams] = None,
81
+ ):
82
+ """Initialize the VIFrameSerializer.
83
+
84
+ Args:
85
+ stream_id: The VI stream identifier.
86
+ call_id: The associated VI call identifier.
87
+ params: Configuration parameters.
88
+ """
89
+ self._stream_id = stream_id
90
+ self._call_id = call_id
91
+ self._params = params or VIFrameSerializer.InputParams()
92
+
93
+ self._vi_sample_rate = self._params.vi_sample_rate
94
+ self._sample_rate = 0 # Pipeline input rate
95
+ self._call_ended = False
96
+
97
+ self._resampler = create_default_resampler()
98
+
99
+ @property
100
+ def type(self) -> FrameSerializerType:
101
+ """Gets the serializer type.
102
+
103
+ Returns:
104
+ The serializer type as TEXT for JSON WebSocket messages.
105
+ """
106
+ return FrameSerializerType.TEXT
107
+
108
+ async def setup(self, frame: StartFrame):
109
+ """Sets up the serializer with pipeline configuration.
110
+
111
+ Args:
112
+ frame: The StartFrame containing pipeline configuration.
113
+ """
114
+ self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
115
+
116
+ async def serialize(self, frame: Frame) -> str | bytes | None:
117
+ """Serializes a Pipecat frame to VI WebSocket format.
118
+
119
+ Handles conversion of various frame types to VI WebSocket messages.
120
+ For EndFrames, initiates call termination if auto_hang_up is enabled.
121
+
122
+ Args:
123
+ frame: The Pipecat frame to serialize.
124
+
125
+ Returns:
126
+ Serialized data as JSON string, or None if the frame isn't handled.
127
+ """
128
+ if (
129
+ self._params.auto_hang_up
130
+ and not self._call_ended
131
+ and isinstance(frame, (EndFrame, CancelFrame))
132
+ ):
133
+ self._call_ended = True
134
+ # Return the exit event to terminate the VI session
135
+ return await self._send_exit_event()
136
+
137
+ elif isinstance(frame, StartInterruptionFrame):
138
+ # Clear/interrupt command for VI - clears queued audio
139
+ message = {
140
+ "event": "clear",
141
+ "stream_id": self._stream_id,
142
+ "call_id": self._call_id,
143
+ }
144
+ logger.debug(f"VI: Sending clear event for stream_id: {self._stream_id}")
145
+ return json.dumps(message)
146
+
147
+ elif isinstance(frame, AudioRawFrame):
148
+ if self._call_ended:
149
+ logger.debug("VI SERIALIZE: Skipping audio - call has ended")
150
+ return None
151
+
152
+ # Convert PCM audio to VI format
153
+ data = frame.audio
154
+
155
+ # Resample to VI sample rate (8kHz)
156
+ serialized_data = await self._resampler.resample(
157
+ data, frame.sample_rate, self._vi_sample_rate
158
+ )
159
+
160
+ # Encode as base64 for transmission
161
+ payload = base64.b64encode(serialized_data).decode("ascii")
162
+
163
+ # VI expects media event format with Base64-encoded PCM audio
164
+ timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
165
+
166
+ message = {
167
+ "event": "media",
168
+ "stream_id": self._stream_id,
169
+ "media": {
170
+ "timestamp": timestamp,
171
+ "chunk": len(serialized_data), # Chunk size in bytes
172
+ "payload": payload,
173
+ },
174
+ }
175
+
176
+ return json.dumps(message)
177
+
178
+ elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
179
+ # Pass through transport messages (for mark events, etc.)
180
+ return json.dumps(frame.message)
181
+
182
+ return None
183
+
184
+ async def _send_exit_event(self):
185
+ """Send an exit event to VI to terminate the session gracefully.
186
+
187
+ This method is called when auto_hang_up is enabled and an EndFrame or
188
+ CancelFrame is received. The exit event allows IVR logic to continue
189
+ after the WebSocket session ends.
190
+ """
191
+ try:
192
+ exit_event = {
193
+ "event": "exit",
194
+ "stream_id": self._stream_id,
195
+ "call_id": self._call_id,
196
+ "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
197
+ }
198
+
199
+ logger.info(
200
+ f"VI auto_hang_up: Sending exit event for stream_id: {self._stream_id}, call_id: {self._call_id}"
201
+ )
202
+ return json.dumps(exit_event)
203
+ except Exception as e:
204
+ logger.error(f"VI auto_hang_up: Failed to create exit event: {e}")
205
+ return None
206
+
207
+ async def deserialize(self, data: str | bytes) -> Frame | None:
208
+ """Deserializes VI WebSocket data to Pipecat frames.
209
+
210
+ Handles conversion of VI media events to appropriate Pipecat frames.
211
+
212
+ Args:
213
+ data: The raw WebSocket data from VI.
214
+
215
+ Returns:
216
+ A Pipecat frame corresponding to the VI event, or None if unhandled.
217
+ """
218
+ try:
219
+ message = json.loads(data)
220
+ except json.JSONDecodeError:
221
+ logger.error(f"Invalid JSON received from VI: {data}")
222
+ return None
223
+
224
+ # Log all incoming events for debugging and monitoring
225
+ event = message.get("event")
226
+ logger.debug(
227
+ f"VI INCOMING EVENT: {event} - stream_id: {self._stream_id}, call_id: {self._call_id}"
228
+ )
229
+
230
+ if event == "media":
231
+ # Handle incoming audio data from VI
232
+ media = message.get("media", {})
233
+ payload_base64 = media.get("payload")
234
+
235
+ if not payload_base64:
236
+ logger.warning("VI DESERIALIZE: No payload in VI media message")
237
+ return None
238
+
239
+ try:
240
+ payload = base64.b64decode(payload_base64)
241
+ chunk_size = len(payload)
242
+
243
+ # Log chunk info (optional)
244
+ logger.debug(
245
+ f"VI DESERIALIZE: Received audio from VI - {chunk_size} bytes at {self._vi_sample_rate}Hz"
246
+ )
247
+
248
+ except Exception as e:
249
+ logger.error(f"VI DESERIALIZE: Error decoding VI audio payload: {e}")
250
+ return None
251
+
252
+ # Convert from VI sample rate (8kHz) to pipeline sample rate
253
+ deserialized_data = await self._resampler.resample(
254
+ payload,
255
+ self._vi_sample_rate,
256
+ self._sample_rate,
257
+ )
258
+
259
+ audio_frame = InputAudioRawFrame(
260
+ audio=deserialized_data,
261
+ num_channels=1, # VI uses mono audio
262
+ sample_rate=self._sample_rate,
263
+ )
264
+ return audio_frame
265
+
266
+ elif event == "dtmf":
267
+ # Handle DTMF events
268
+ dtmf_data = message.get("dtmf", {})
269
+ digit = dtmf_data.get("digit")
270
+
271
+ if digit:
272
+ try:
273
+ logger.info(f"VI: Received DTMF digit: {digit}")
274
+ return InputDTMFFrame(KeypadEntry(digit))
275
+ except ValueError:
276
+ logger.warning(f"Invalid DTMF digit from VI: {digit}")
277
+ return None
278
+
279
+ elif event == "connected":
280
+ # Handle connection event
281
+ logger.info(f"VI connection established: {message}")
282
+ return None
283
+
284
+ elif event == "start":
285
+ # Handle stream start event
286
+ logger.info(f"VI stream started: {message}")
287
+ return None
288
+
289
+ elif event == "stop":
290
+ # Handle stream stop event
291
+ logger.info(f"VI stream stopped: {message}")
292
+ # Don't end the call here, wait for explicit exit or call end
293
+ return None
294
+
295
+ elif event == "mark":
296
+ # Handle mark event - checkpoint confirming audio playback completion
297
+ mark_data = message.get("mark", {})
298
+ mark_name = mark_data.get("name", "unknown")
299
+ logger.info(f"VI mark event received: {mark_name}")
300
+ # Mark events are informational, no frame to return
301
+ return None
302
+
303
+ elif event == "error":
304
+ # Handle error events
305
+ error_msg = message.get("error", "Unknown error")
306
+ logger.error(f"VI error: {error_msg}")
307
+ return None
308
+
309
+ elif event == "exit":
310
+ # Handle exit event from VI
311
+ logger.info("VI exit event received - terminating session")
312
+ self._call_ended = True
313
+ return CancelFrame()
314
+
315
+ elif event == "call_end" or event == "callEnd":
316
+ # Handle call end event (if VI sends this)
317
+ logger.info("VI call end event received")
318
+ self._call_ended = True
319
+ return CancelFrame()
320
+
321
+ else:
322
+ logger.debug(f"VI UNHANDLED EVENT: {event}")
323
+
324
+ return None
@@ -97,9 +97,7 @@ class AIService(FrameProcessor):
97
97
  pass
98
98
 
99
99
  async def _update_settings(self, settings: Mapping[str, Any]):
100
- from pipecat.services.openai_realtime_beta.events import (
101
- SessionProperties,
102
- )
100
+ from pipecat.services.openai.realtime.events import SessionProperties
103
101
 
104
102
  for key, value in settings.items():
105
103
  logger.debug("Update request for:", key, value)
@@ -111,9 +109,7 @@ class AIService(FrameProcessor):
111
109
  logger.debug("Attempting to update", key, value)
112
110
 
113
111
  try:
114
- from pipecat.services.openai_realtime_beta.events import (
115
- TurnDetection,
116
- )
112
+ from pipecat.services.openai.realtime.events import TurnDetection
117
113
 
118
114
  if isinstance(self._session_properties, SessionProperties):
119
115
  current_properties = self._session_properties
@@ -42,7 +42,6 @@ from pipecat.frames.frames import (
42
42
  LLMTextFrame,
43
43
  LLMUpdateSettingsFrame,
44
44
  UserImageRawFrame,
45
- VisionImageRawFrame,
46
45
  )
47
46
  from pipecat.metrics.metrics import LLMTokenUsage
48
47
  from pipecat.processors.aggregators.llm_context import LLMContext
@@ -152,7 +151,7 @@ class AnthropicLLMService(LLMService):
152
151
  self,
153
152
  *,
154
153
  api_key: str,
155
- model: str = "claude-sonnet-4-20250514",
154
+ model: str = "claude-sonnet-4-5-20250929",
156
155
  params: Optional[InputParams] = None,
157
156
  client=None,
158
157
  retry_timeout_secs: Optional[float] = 5.0,
@@ -163,7 +162,7 @@ class AnthropicLLMService(LLMService):
163
162
 
164
163
  Args:
165
164
  api_key: Anthropic API key for authentication.
166
- model: Model name to use. Defaults to "claude-sonnet-4-20250514".
165
+ model: Model name to use. Defaults to "claude-sonnet-4-5-20250929".
167
166
  params: Optional model parameters for inference.
168
167
  client: Optional custom Anthropic client instance.
169
168
  retry_timeout_secs: Request timeout in seconds for retry logic.
@@ -495,12 +494,6 @@ class AnthropicLLMService(LLMService):
495
494
  context = frame.context
496
495
  elif isinstance(frame, LLMMessagesFrame):
497
496
  context = AnthropicLLMContext.from_messages(frame.messages)
498
- elif isinstance(frame, VisionImageRawFrame):
499
- # This is only useful in very simple pipelines because it creates
500
- # a new context. Generally we want a context manager to catch
501
- # UserImageRawFrames coming through the pipeline and add them
502
- # to the context.
503
- context = AnthropicLLMContext.from_image_frame(frame)
504
497
  elif isinstance(frame, LLMUpdateSettingsFrame):
505
498
  await self._update_settings(frame.settings)
506
499
  elif isinstance(frame, LLMEnablePromptCachingFrame):
@@ -626,22 +619,6 @@ class AnthropicLLMContext(OpenAILLMContext):
626
619
  self._restructure_from_openai_messages()
627
620
  return self
628
621
 
629
- @classmethod
630
- def from_image_frame(cls, frame: VisionImageRawFrame) -> "AnthropicLLMContext":
631
- """Create context from a vision image frame.
632
-
633
- Args:
634
- frame: The vision image frame to process.
635
-
636
- Returns:
637
- New Anthropic context with the image message.
638
- """
639
- context = cls()
640
- context.add_image_frame_message(
641
- format=frame.format, size=frame.size, image=frame.image, text=frame.text
642
- )
643
- return context
644
-
645
622
  def set_messages(self, messages: List):
646
623
  """Set the messages list and reset cache tracking.
647
624
 
@@ -108,6 +108,8 @@ class AssemblyAIConnectionParams(BaseModel):
108
108
  end_of_turn_confidence_threshold: Confidence threshold for end-of-turn detection.
109
109
  min_end_of_turn_silence_when_confident: Minimum silence duration when confident about end-of-turn.
110
110
  max_turn_silence: Maximum silence duration before forcing end-of-turn.
111
+ keyterms_prompt: List of key terms to guide transcription. Will be JSON serialized before sending.
112
+ speech_model: Select between English and multilingual models. Defaults to "universal-streaming-english".
111
113
  """
112
114
 
113
115
  sample_rate: int = 16000
@@ -117,3 +119,7 @@ class AssemblyAIConnectionParams(BaseModel):
117
119
  end_of_turn_confidence_threshold: Optional[float] = None
118
120
  min_end_of_turn_silence_when_confident: Optional[int] = None
119
121
  max_turn_silence: Optional[int] = None
122
+ keyterms_prompt: Optional[List[str]] = None
123
+ speech_model: Literal["universal-streaming-english", "universal-streaming-multilingual"] = (
124
+ "universal-streaming-english"
125
+ )
@@ -174,11 +174,16 @@ class AssemblyAISTTService(STTService):
174
174
 
175
175
  def _build_ws_url(self) -> str:
176
176
  """Build WebSocket URL with query parameters using urllib.parse.urlencode."""
177
- params = {
178
- k: str(v).lower() if isinstance(v, bool) else v
179
- for k, v in self._connection_params.model_dump().items()
180
- if v is not None
181
- }
177
+ params = {}
178
+ for k, v in self._connection_params.model_dump().items():
179
+ if v is not None:
180
+ if k == "keyterms_prompt":
181
+ params[k] = json.dumps(v)
182
+ elif isinstance(v, bool):
183
+ params[k] = str(v).lower()
184
+ else:
185
+ params[k] = v
186
+
182
187
  if params:
183
188
  query_string = urlencode(params)
184
189
  return f"{self._api_endpoint_base_url}?{query_string}"
@@ -197,6 +202,8 @@ class AssemblyAISTTService(STTService):
197
202
  )
198
203
  self._connected = True
199
204
  self._receive_task = self.create_task(self._receive_task_handler())
205
+
206
+ await self._call_event_handler("on_connected")
200
207
  except Exception as e:
201
208
  logger.error(f"Failed to connect to AssemblyAI: {e}")
202
209
  self._connected = False
@@ -238,6 +245,7 @@ class AssemblyAISTTService(STTService):
238
245
  self._websocket = None
239
246
  self._connected = False
240
247
  self._receive_task = None
248
+ await self._call_event_handler("on_disconnected")
241
249
 
242
250
  async def _receive_task_handler(self):
243
251
  """Handle incoming WebSocket messages."""
@@ -20,8 +20,8 @@ from pipecat.frames.frames import (
20
20
  EndFrame,
21
21
  ErrorFrame,
22
22
  Frame,
23
+ InterruptionFrame,
23
24
  StartFrame,
24
- StartInterruptionFrame,
25
25
  TTSAudioRawFrame,
26
26
  TTSStartedFrame,
27
27
  TTSStoppedFrame,
@@ -119,7 +119,6 @@ class AsyncAITTSService(InterruptibleTTSService):
119
119
  """
120
120
  super().__init__(
121
121
  aggregate_sentences=aggregate_sentences,
122
- push_text_frames=False,
123
122
  pause_frame_processing=True,
124
123
  push_stop_frames=True,
125
124
  sample_rate=sample_rate,
@@ -236,6 +235,8 @@ class AsyncAITTSService(InterruptibleTTSService):
236
235
  }
237
236
 
238
237
  await self._get_websocket().send(json.dumps(init_msg))
238
+
239
+ await self._call_event_handler("on_connected")
239
240
  except Exception as e:
240
241
  logger.error(f"{self} initialization error: {e}")
241
242
  self._websocket = None
@@ -253,6 +254,7 @@ class AsyncAITTSService(InterruptibleTTSService):
253
254
  finally:
254
255
  self._websocket = None
255
256
  self._started = False
257
+ await self._call_event_handler("on_disconnected")
256
258
 
257
259
  def _get_websocket(self):
258
260
  if self._websocket:
@@ -275,7 +277,7 @@ class AsyncAITTSService(InterruptibleTTSService):
275
277
  direction: The direction to push the frame.
276
278
  """
277
279
  await super().push_frame(frame, direction)
278
- if isinstance(frame, (TTSStoppedFrame, StartInterruptionFrame)):
280
+ if isinstance(frame, (TTSStoppedFrame, InterruptionFrame)):
279
281
  self._started = False
280
282
 
281
283
  async def _receive_messages(self):