dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of dv-pipecat-ai might be problematic. Click here for more details.

Files changed (195) hide show
  1. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
  2. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
  3. pipecat/adapters/base_llm_adapter.py +38 -1
  4. pipecat/adapters/services/anthropic_adapter.py +9 -14
  5. pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
  6. pipecat/adapters/services/bedrock_adapter.py +236 -13
  7. pipecat/adapters/services/gemini_adapter.py +12 -8
  8. pipecat/adapters/services/open_ai_adapter.py +19 -7
  9. pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
  10. pipecat/audio/dtmf/dtmf-0.wav +0 -0
  11. pipecat/audio/dtmf/dtmf-1.wav +0 -0
  12. pipecat/audio/dtmf/dtmf-2.wav +0 -0
  13. pipecat/audio/dtmf/dtmf-3.wav +0 -0
  14. pipecat/audio/dtmf/dtmf-4.wav +0 -0
  15. pipecat/audio/dtmf/dtmf-5.wav +0 -0
  16. pipecat/audio/dtmf/dtmf-6.wav +0 -0
  17. pipecat/audio/dtmf/dtmf-7.wav +0 -0
  18. pipecat/audio/dtmf/dtmf-8.wav +0 -0
  19. pipecat/audio/dtmf/dtmf-9.wav +0 -0
  20. pipecat/audio/dtmf/dtmf-pound.wav +0 -0
  21. pipecat/audio/dtmf/dtmf-star.wav +0 -0
  22. pipecat/audio/filters/krisp_viva_filter.py +193 -0
  23. pipecat/audio/filters/noisereduce_filter.py +15 -0
  24. pipecat/audio/turn/base_turn_analyzer.py +9 -1
  25. pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
  26. pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
  27. pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
  28. pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
  29. pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
  30. pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
  31. pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
  32. pipecat/audio/vad/data/README.md +10 -0
  33. pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
  34. pipecat/audio/vad/silero.py +9 -3
  35. pipecat/audio/vad/vad_analyzer.py +13 -1
  36. pipecat/extensions/voicemail/voicemail_detector.py +5 -5
  37. pipecat/frames/frames.py +277 -86
  38. pipecat/observers/loggers/debug_log_observer.py +3 -3
  39. pipecat/observers/loggers/llm_log_observer.py +7 -3
  40. pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
  41. pipecat/pipeline/runner.py +18 -6
  42. pipecat/pipeline/service_switcher.py +64 -36
  43. pipecat/pipeline/task.py +125 -79
  44. pipecat/pipeline/tts_switcher.py +30 -0
  45. pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
  46. pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
  47. pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
  48. pipecat/processors/aggregators/llm_context.py +40 -2
  49. pipecat/processors/aggregators/llm_response.py +32 -15
  50. pipecat/processors/aggregators/llm_response_universal.py +19 -15
  51. pipecat/processors/aggregators/user_response.py +6 -6
  52. pipecat/processors/aggregators/vision_image_frame.py +24 -2
  53. pipecat/processors/audio/audio_buffer_processor.py +43 -8
  54. pipecat/processors/dtmf_aggregator.py +174 -77
  55. pipecat/processors/filters/stt_mute_filter.py +17 -0
  56. pipecat/processors/frame_processor.py +110 -24
  57. pipecat/processors/frameworks/langchain.py +8 -2
  58. pipecat/processors/frameworks/rtvi.py +210 -68
  59. pipecat/processors/frameworks/strands_agents.py +170 -0
  60. pipecat/processors/logger.py +2 -2
  61. pipecat/processors/transcript_processor.py +26 -5
  62. pipecat/processors/user_idle_processor.py +35 -11
  63. pipecat/runner/daily.py +59 -20
  64. pipecat/runner/run.py +395 -93
  65. pipecat/runner/types.py +6 -4
  66. pipecat/runner/utils.py +51 -10
  67. pipecat/serializers/__init__.py +5 -1
  68. pipecat/serializers/asterisk.py +16 -2
  69. pipecat/serializers/convox.py +41 -4
  70. pipecat/serializers/custom.py +257 -0
  71. pipecat/serializers/exotel.py +5 -5
  72. pipecat/serializers/livekit.py +20 -0
  73. pipecat/serializers/plivo.py +5 -5
  74. pipecat/serializers/protobuf.py +6 -5
  75. pipecat/serializers/telnyx.py +2 -2
  76. pipecat/serializers/twilio.py +43 -23
  77. pipecat/serializers/vi.py +324 -0
  78. pipecat/services/ai_service.py +2 -6
  79. pipecat/services/anthropic/llm.py +2 -25
  80. pipecat/services/assemblyai/models.py +6 -0
  81. pipecat/services/assemblyai/stt.py +13 -5
  82. pipecat/services/asyncai/tts.py +5 -3
  83. pipecat/services/aws/__init__.py +1 -0
  84. pipecat/services/aws/llm.py +147 -105
  85. pipecat/services/aws/nova_sonic/__init__.py +0 -0
  86. pipecat/services/aws/nova_sonic/context.py +436 -0
  87. pipecat/services/aws/nova_sonic/frames.py +25 -0
  88. pipecat/services/aws/nova_sonic/llm.py +1265 -0
  89. pipecat/services/aws/stt.py +3 -3
  90. pipecat/services/aws_nova_sonic/__init__.py +19 -1
  91. pipecat/services/aws_nova_sonic/aws.py +11 -1151
  92. pipecat/services/aws_nova_sonic/context.py +8 -354
  93. pipecat/services/aws_nova_sonic/frames.py +13 -17
  94. pipecat/services/azure/llm.py +51 -1
  95. pipecat/services/azure/realtime/__init__.py +0 -0
  96. pipecat/services/azure/realtime/llm.py +65 -0
  97. pipecat/services/azure/stt.py +15 -0
  98. pipecat/services/cartesia/stt.py +77 -70
  99. pipecat/services/cartesia/tts.py +80 -13
  100. pipecat/services/deepgram/__init__.py +1 -0
  101. pipecat/services/deepgram/flux/__init__.py +0 -0
  102. pipecat/services/deepgram/flux/stt.py +640 -0
  103. pipecat/services/elevenlabs/__init__.py +4 -1
  104. pipecat/services/elevenlabs/stt.py +339 -0
  105. pipecat/services/elevenlabs/tts.py +87 -46
  106. pipecat/services/fish/tts.py +5 -2
  107. pipecat/services/gemini_multimodal_live/events.py +38 -524
  108. pipecat/services/gemini_multimodal_live/file_api.py +23 -173
  109. pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
  110. pipecat/services/gladia/stt.py +56 -72
  111. pipecat/services/google/__init__.py +1 -0
  112. pipecat/services/google/gemini_live/__init__.py +3 -0
  113. pipecat/services/google/gemini_live/file_api.py +189 -0
  114. pipecat/services/google/gemini_live/llm.py +1582 -0
  115. pipecat/services/google/gemini_live/llm_vertex.py +184 -0
  116. pipecat/services/google/llm.py +15 -11
  117. pipecat/services/google/llm_openai.py +3 -3
  118. pipecat/services/google/llm_vertex.py +86 -16
  119. pipecat/services/google/stt.py +4 -0
  120. pipecat/services/google/tts.py +7 -3
  121. pipecat/services/heygen/api.py +2 -0
  122. pipecat/services/heygen/client.py +8 -4
  123. pipecat/services/heygen/video.py +2 -0
  124. pipecat/services/hume/__init__.py +5 -0
  125. pipecat/services/hume/tts.py +220 -0
  126. pipecat/services/inworld/tts.py +6 -6
  127. pipecat/services/llm_service.py +15 -5
  128. pipecat/services/lmnt/tts.py +4 -2
  129. pipecat/services/mcp_service.py +4 -2
  130. pipecat/services/mem0/memory.py +6 -5
  131. pipecat/services/mistral/llm.py +29 -8
  132. pipecat/services/moondream/vision.py +42 -16
  133. pipecat/services/neuphonic/tts.py +5 -2
  134. pipecat/services/openai/__init__.py +1 -0
  135. pipecat/services/openai/base_llm.py +27 -20
  136. pipecat/services/openai/realtime/__init__.py +0 -0
  137. pipecat/services/openai/realtime/context.py +272 -0
  138. pipecat/services/openai/realtime/events.py +1106 -0
  139. pipecat/services/openai/realtime/frames.py +37 -0
  140. pipecat/services/openai/realtime/llm.py +829 -0
  141. pipecat/services/openai/tts.py +49 -10
  142. pipecat/services/openai_realtime/__init__.py +27 -0
  143. pipecat/services/openai_realtime/azure.py +21 -0
  144. pipecat/services/openai_realtime/context.py +21 -0
  145. pipecat/services/openai_realtime/events.py +21 -0
  146. pipecat/services/openai_realtime/frames.py +21 -0
  147. pipecat/services/openai_realtime_beta/azure.py +16 -0
  148. pipecat/services/openai_realtime_beta/openai.py +17 -5
  149. pipecat/services/piper/tts.py +7 -9
  150. pipecat/services/playht/tts.py +34 -4
  151. pipecat/services/rime/tts.py +12 -12
  152. pipecat/services/riva/stt.py +3 -1
  153. pipecat/services/salesforce/__init__.py +9 -0
  154. pipecat/services/salesforce/llm.py +700 -0
  155. pipecat/services/sarvam/__init__.py +7 -0
  156. pipecat/services/sarvam/stt.py +540 -0
  157. pipecat/services/sarvam/tts.py +97 -13
  158. pipecat/services/simli/video.py +2 -2
  159. pipecat/services/speechmatics/stt.py +22 -10
  160. pipecat/services/stt_service.py +47 -0
  161. pipecat/services/tavus/video.py +2 -2
  162. pipecat/services/tts_service.py +75 -22
  163. pipecat/services/vision_service.py +7 -6
  164. pipecat/services/vistaar/llm.py +51 -9
  165. pipecat/tests/utils.py +4 -4
  166. pipecat/transcriptions/language.py +41 -1
  167. pipecat/transports/base_input.py +13 -34
  168. pipecat/transports/base_output.py +140 -104
  169. pipecat/transports/daily/transport.py +199 -26
  170. pipecat/transports/heygen/__init__.py +0 -0
  171. pipecat/transports/heygen/transport.py +381 -0
  172. pipecat/transports/livekit/transport.py +228 -63
  173. pipecat/transports/local/audio.py +6 -1
  174. pipecat/transports/local/tk.py +11 -2
  175. pipecat/transports/network/fastapi_websocket.py +1 -1
  176. pipecat/transports/smallwebrtc/connection.py +103 -19
  177. pipecat/transports/smallwebrtc/request_handler.py +246 -0
  178. pipecat/transports/smallwebrtc/transport.py +65 -23
  179. pipecat/transports/tavus/transport.py +23 -12
  180. pipecat/transports/websocket/client.py +41 -5
  181. pipecat/transports/websocket/fastapi.py +21 -11
  182. pipecat/transports/websocket/server.py +14 -7
  183. pipecat/transports/whatsapp/api.py +8 -0
  184. pipecat/transports/whatsapp/client.py +47 -0
  185. pipecat/utils/base_object.py +54 -22
  186. pipecat/utils/redis.py +58 -0
  187. pipecat/utils/string.py +13 -1
  188. pipecat/utils/tracing/service_decorators.py +21 -21
  189. pipecat/serializers/genesys.py +0 -95
  190. pipecat/services/google/test-google-chirp.py +0 -45
  191. pipecat/services/openai.py +0 -698
  192. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
  193. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
  194. {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
  195. /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
pipecat/runner/utils.py CHANGED
@@ -99,16 +99,47 @@ async def parse_telephony_websocket(websocket: WebSocket):
99
99
  tuple: (transport_type: str, call_data: dict)
100
100
 
101
101
  call_data contains provider-specific fields:
102
- - Twilio: {"stream_id": str, "call_id": str}
103
- - Telnyx: {"stream_id": str, "call_control_id": str, "outbound_encoding": str}
104
- - Plivo: {"stream_id": str, "call_id": str}
105
- - Exotel: {"stream_id": str, "call_id": str, "account_sid": str}
102
+
103
+ - Twilio::
104
+
105
+ {
106
+ "stream_id": str,
107
+ "call_id": str,
108
+ "body": dict
109
+ }
110
+
111
+ - Telnyx::
112
+
113
+ {
114
+ "stream_id": str,
115
+ "call_control_id": str,
116
+ "outbound_encoding": str,
117
+ "from": str,
118
+ "to": str,
119
+ }
120
+
121
+ - Plivo::
122
+
123
+ {
124
+ "stream_id": str,
125
+ "call_id": str,
126
+ }
127
+
128
+ - Exotel::
129
+
130
+ {
131
+ "stream_id": str,
132
+ "call_id": str,
133
+ "account_sid": str,
134
+ "from": str,
135
+ "to": str,
136
+ }
106
137
 
107
138
  Example usage::
108
139
 
109
140
  transport_type, call_data = await parse_telephony_websocket(websocket)
110
- if transport_type == "telnyx":
111
- outbound_encoding = call_data["outbound_encoding"]
141
+ if transport_type == "twilio":
142
+ user_id = call_data["body"]["user_id"]
112
143
  """
113
144
  # Read first two messages
114
145
  start_data = websocket.iter_text()
@@ -151,9 +182,12 @@ async def parse_telephony_websocket(websocket: WebSocket):
151
182
  # Extract provider-specific data
152
183
  if transport_type == "twilio":
153
184
  start_data = call_data_raw.get("start", {})
185
+ body_data = start_data.get("customParameters", {})
154
186
  call_data = {
155
187
  "stream_id": start_data.get("streamSid"),
156
188
  "call_id": start_data.get("callSid"),
189
+ # All custom parameters
190
+ "body": body_data,
157
191
  }
158
192
 
159
193
  elif transport_type == "telnyx":
@@ -163,6 +197,8 @@ async def parse_telephony_websocket(websocket: WebSocket):
163
197
  "outbound_encoding": call_data_raw.get("start", {})
164
198
  .get("media_format", {})
165
199
  .get("encoding"),
200
+ "from": call_data_raw.get("start", {}).get("from", ""),
201
+ "to": call_data_raw.get("start", {}).get("to", ""),
166
202
  }
167
203
 
168
204
  elif transport_type == "plivo":
@@ -178,6 +214,8 @@ async def parse_telephony_websocket(websocket: WebSocket):
178
214
  "stream_id": start_data.get("stream_sid"),
179
215
  "call_id": start_data.get("call_sid"),
180
216
  "account_sid": start_data.get("account_sid"),
217
+ "from": start_data.get("from", ""),
218
+ "to": start_data.get("to", ""),
181
219
  }
182
220
 
183
221
  else:
@@ -275,6 +313,7 @@ def _smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
275
313
  Returns:
276
314
  Cleaned SDP text with filtered ICE candidates.
277
315
  """
316
+ logger.debug("Removing unsupported ICE candidates from SDP")
278
317
  result = []
279
318
  lines = text.splitlines()
280
319
  for line in lines:
@@ -283,7 +322,7 @@ def _smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
283
322
  result.append(line)
284
323
  else:
285
324
  result.append(line)
286
- return "\r\n".join(result)
325
+ return "\r\n".join(result) + "\r\n"
287
326
 
288
327
 
289
328
  def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
@@ -295,15 +334,16 @@ def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
295
334
  Returns:
296
335
  SDP text with sha-384 and sha-512 fingerprints removed.
297
336
  """
337
+ logger.debug("Removing unsupported fingerprints from SDP")
298
338
  result = []
299
339
  lines = text.splitlines()
300
340
  for line in lines:
301
341
  if not re.search("sha-384", line) and not re.search("sha-512", line):
302
342
  result.append(line)
303
- return "\r\n".join(result)
343
+ return "\r\n".join(result) + "\r\n"
304
344
 
305
345
 
306
- def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
346
+ def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str:
307
347
  """Apply SDP modifications for SmallWebRTC compatibility.
308
348
 
309
349
  Args:
@@ -314,7 +354,8 @@ def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
314
354
  Modified SDP string with fingerprint and ICE candidate cleanup.
315
355
  """
316
356
  sdp = _smallwebrtc_sdp_cleanup_fingerprints(sdp)
317
- sdp = _smallwebrtc_sdp_cleanup_ice_candidates(sdp, host)
357
+ if host:
358
+ sdp = _smallwebrtc_sdp_cleanup_ice_candidates(sdp, host)
318
359
  return sdp
319
360
 
320
361
 
@@ -1,18 +1,22 @@
1
1
  from .base_serializer import FrameSerializer, FrameSerializerType
2
2
  from .convox import ConVoxFrameSerializer
3
+ from .custom import CustomFrameSerializer
3
4
  from .exotel import ExotelFrameSerializer
4
5
  from .plivo import PlivoFrameSerializer
5
6
  from .telnyx import TelnyxFrameSerializer
6
7
  from .twilio import TwilioFrameSerializer
8
+ from .vi import VIFrameSerializer
7
9
 
8
10
  __all__ = [
9
11
  "FrameSerializer",
10
- "FrameSerializerType",
12
+ "FrameSerializerType",
11
13
  "ConVoxFrameSerializer",
14
+ "CustomFrameSerializer",
12
15
  "ExotelFrameSerializer",
13
16
  "PlivoFrameSerializer",
14
17
  "TelnyxFrameSerializer",
15
18
  "TwilioFrameSerializer",
19
+ "VIFrameSerializer",
16
20
  ]
17
21
 
18
22
  # Optional imports
@@ -1,4 +1,6 @@
1
1
  # asterisk_ws_serializer.py
2
+ """Frame serializer for Asterisk WebSocket communication."""
3
+
2
4
  import base64
3
5
  import json
4
6
  from typing import Literal, Optional
@@ -12,8 +14,8 @@ from pipecat.frames.frames import (
12
14
  EndFrame,
13
15
  Frame,
14
16
  InputAudioRawFrame,
17
+ InterruptionFrame,
15
18
  StartFrame,
16
- StartInterruptionFrame,
17
19
  TransportMessageFrame,
18
20
  TransportMessageUrgentFrame,
19
21
  )
@@ -21,6 +23,8 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
21
23
 
22
24
 
23
25
  class AsteriskFrameSerializer(FrameSerializer):
26
+ """Serializes Pipecat frames to/from Asterisk WebSocket JSON messages."""
27
+
24
28
  class InputParams(BaseModel):
25
29
  """Configuration parameters for AsteriskFrameSerializer.
26
30
 
@@ -39,6 +43,12 @@ class AsteriskFrameSerializer(FrameSerializer):
39
43
  auto_hang_up: bool = False # no-op here; adapter handles hangup
40
44
 
41
45
  def __init__(self, stream_id: str, params: Optional[InputParams] = None):
46
+ """Initialize the Asterisk frame serializer.
47
+
48
+ Args:
49
+ stream_id: Unique identifier for the media stream.
50
+ params: Configuration parameters for the serializer.
51
+ """
42
52
  self._stream_id = stream_id
43
53
  self._params = params or AsteriskFrameSerializer.InputParams()
44
54
  self._tel_rate = self._params.telephony_sample_rate
@@ -49,13 +59,16 @@ class AsteriskFrameSerializer(FrameSerializer):
49
59
 
50
60
  @property
51
61
  def type(self) -> FrameSerializerType:
62
+ """Return the serializer type (TEXT for JSON messages)."""
52
63
  return FrameSerializerType.TEXT # we send/recv JSON strings
53
64
 
54
65
  async def setup(self, frame: StartFrame):
66
+ """Setup the serializer with audio parameters from the StartFrame."""
55
67
  self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
56
68
 
57
69
  # Pipecat -> Adapter (play to caller)
58
70
  async def serialize(self, frame: Frame) -> str | bytes | None:
71
+ """Serialize Pipecat frames to Asterisk WebSocket JSON messages."""
59
72
  # On pipeline end, ask bridge to hang up
60
73
  if (
61
74
  self._params.auto_hang_up
@@ -64,7 +77,7 @@ class AsteriskFrameSerializer(FrameSerializer):
64
77
  ):
65
78
  self._hangup_sent = True
66
79
  return json.dumps({"event": "hangup"})
67
- if isinstance(frame, StartInterruptionFrame):
80
+ if isinstance(frame, InterruptionFrame):
68
81
  return json.dumps({"event": "clear", "streamId": self._stream_id})
69
82
  if isinstance(frame, AudioRawFrame):
70
83
  pcm = frame.audio
@@ -114,6 +127,7 @@ class AsteriskFrameSerializer(FrameSerializer):
114
127
 
115
128
  # Adapter -> Pipecat (audio from caller)
116
129
  async def deserialize(self, data: str | bytes) -> Frame | None:
130
+ """Deserialize Asterisk WebSocket JSON messages to Pipecat frames."""
117
131
  try:
118
132
  msg = json.loads(data)
119
133
  except Exception:
@@ -4,9 +4,11 @@
4
4
  # SPDX-License-Identifier: BSD 2-Clause License
5
5
  #
6
6
 
7
+ """ConVox WebSocket frame serializer for audio streaming and call management."""
8
+
7
9
  import base64
8
- import datetime
9
10
  import json
11
+ from datetime import datetime, timezone
10
12
  from typing import Optional
11
13
 
12
14
  from loguru import logger
@@ -20,9 +22,9 @@ from pipecat.frames.frames import (
20
22
  Frame,
21
23
  InputAudioRawFrame,
22
24
  InputDTMFFrame,
25
+ InterruptionFrame,
23
26
  KeypadEntry,
24
27
  StartFrame,
25
- StartInterruptionFrame,
26
28
  TransportMessageFrame,
27
29
  TransportMessageUrgentFrame,
28
30
  )
@@ -99,6 +101,7 @@ class ConVoxFrameSerializer(FrameSerializer):
99
101
  """Serializes a Pipecat frame to ConVox WebSocket format.
100
102
 
101
103
  Handles conversion of various frame types to ConVox WebSocket messages.
104
+ For EndFrames, initiates call termination if auto_hang_up is enabled.
102
105
 
103
106
  Args:
104
107
  frame: The Pipecat frame to serialize.
@@ -106,7 +109,15 @@ class ConVoxFrameSerializer(FrameSerializer):
106
109
  Returns:
107
110
  Serialized data as JSON string, or None if the frame isn't handled.
108
111
  """
109
- if isinstance(frame, StartInterruptionFrame):
112
+ if (
113
+ self._params.auto_hang_up
114
+ and not self._call_ended
115
+ and isinstance(frame, (EndFrame, CancelFrame))
116
+ ):
117
+ self._call_ended = True
118
+ # Return the callEnd event to be sent via the WebSocket
119
+ return await self._send_call_end_event()
120
+ elif isinstance(frame, InterruptionFrame):
110
121
  # Clear/interrupt command for ConVox
111
122
  message = {
112
123
  "event": "clear",
@@ -138,7 +149,7 @@ class ConVoxFrameSerializer(FrameSerializer):
138
149
  payload = base64.b64encode(serialized_data).decode("ascii")
139
150
 
140
151
  # ConVox expects play_audio event format according to the documentation
141
- timestamp = datetime.datetime.now().isoformat()
152
+ timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
142
153
 
143
154
  message = {
144
155
  "event": "play_audio",
@@ -164,6 +175,32 @@ class ConVoxFrameSerializer(FrameSerializer):
164
175
 
165
176
  return None
166
177
 
178
+ async def _send_call_end_event(self):
179
+ """Send a callEnd event to ConVox to terminate the call.
180
+
181
+ This method is called when auto_hang_up is enabled and an EndFrame or
182
+ CancelFrame is received, similar to the logic in end_call_handler.py.
183
+ """
184
+ try:
185
+ call_end_event = {
186
+ "event": "callEnd",
187
+ "details": {
188
+ "timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
189
+ "direction": "WSS",
190
+ "message": "Event trigger request",
191
+ },
192
+ }
193
+
194
+ logger.info(
195
+ f"ConVox auto_hang_up: Sending callEnd event for stream_id: {self._stream_id}, call_id: {self._call_id}"
196
+ )
197
+ # Note: The actual sending will be handled by the transport layer
198
+ # when this method returns the JSON string
199
+ return json.dumps(call_end_event)
200
+ except Exception as e:
201
+ logger.error(f"ConVox auto_hang_up: Failed to create callEnd event: {e}")
202
+ return None
203
+
167
204
  async def deserialize(self, data: str | bytes) -> Frame | None:
168
205
  """Deserializes ConVox WebSocket data to Pipecat frames.
169
206
 
@@ -0,0 +1,257 @@
1
+ #
2
+ # Copyright (c) 2024–2025, Daily
3
+ #
4
+ # SPDX-License-Identifier: BSD 2-Clause License
5
+ #
6
+
7
+ """Custom/External telephony serializer for Pipecat with Ringg AI WebSocket API. Customers will directly connect to Ringg AI WebSocket API."""
8
+
9
+ import base64
10
+ import json
11
+ import uuid
12
+ from typing import Optional
13
+
14
+ from loguru import logger
15
+ from pydantic import BaseModel
16
+
17
+ from pipecat.audio.utils import (
18
+ alaw_to_pcm,
19
+ create_stream_resampler,
20
+ pcm_to_alaw,
21
+ pcm_to_ulaw,
22
+ ulaw_to_pcm,
23
+ )
24
+ from pipecat.frames.frames import (
25
+ AudioRawFrame,
26
+ CallTransferFrame,
27
+ CancelFrame,
28
+ EndFrame,
29
+ Frame,
30
+ InputAudioRawFrame,
31
+ InterruptionFrame,
32
+ StartFrame,
33
+ TransportMessageFrame,
34
+ TransportMessageUrgentFrame,
35
+ )
36
+ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
37
+
38
+
39
+ class CustomFrameSerializer(FrameSerializer):
40
+ """Serializer for Custom/External telephony WebSocket protocol (Ringg AI API).
41
+
42
+ This serializer handles converting between Pipecat frames and the Ringg AI
43
+ WebSocket protocol for external/custom telephony providers. It supports
44
+ PCMU (μ-law), PCMA (A-law), and PCM codecs with automatic conversion.
45
+
46
+ Supported events:
47
+ - start: Initialize call with agent configuration
48
+ - media: Bidirectional audio streaming
49
+ - clear: Clear audio buffers (interruption)
50
+ - call_transfer: Transfer call to another number
51
+ - hang_up: End call notification
52
+
53
+ Audio format:
54
+ - Sample Rate: Configurable (default 8kHz)
55
+ - Channels: Mono (1 channel)
56
+ - Bit Depth: 16-bit
57
+ - Encoding: Little-endian
58
+ - Payload Encoding: Base64
59
+ - Supported Codecs: PCMU (μ-law), PCMA (A-law), PCM (raw)
60
+ """
61
+
62
+ class InputParams(BaseModel):
63
+ """Configuration parameters for CustomFrameSerializer.
64
+
65
+ Parameters:
66
+ custom_sample_rate: Sample rate used by external client, defaults to 8000 Hz.
67
+ sample_rate: Optional override for pipeline input sample rate.
68
+ codec: Audio codec - "pcmu" (μ-law), "pcma" (A-law), or "pcm" (raw PCM).
69
+ """
70
+
71
+ custom_sample_rate: int = 8000
72
+ sample_rate: Optional[int] = None
73
+ codec: str = "pcmu" # "pcmu" or "pcm"
74
+
75
+ def __init__(
76
+ self, stream_sid: str, call_sid: Optional[str] = None, params: Optional[InputParams] = None
77
+ ):
78
+ """Initialize the CustomFrameSerializer.
79
+
80
+ Args:
81
+ stream_sid: The stream identifier from external client.
82
+ call_sid: The call identifier from external client.
83
+ params: Configuration parameters.
84
+ """
85
+ self._stream_sid = stream_sid
86
+ self._call_sid = call_sid
87
+ self._params = params or CustomFrameSerializer.InputParams()
88
+
89
+ self._custom_sample_rate = self._params.custom_sample_rate
90
+ self._sample_rate = 0 # Pipeline input rate
91
+ self._codec = self._params.codec.lower()
92
+
93
+ self._input_resampler = create_stream_resampler()
94
+ self._output_resampler = create_stream_resampler()
95
+
96
+ @property
97
+ def type(self) -> FrameSerializerType:
98
+ """Gets the serializer type.
99
+
100
+ Returns:
101
+ The serializer type, TEXT for JSON-based protocol.
102
+ """
103
+ return FrameSerializerType.TEXT
104
+
105
+ async def setup(self, frame: StartFrame):
106
+ """Sets up the serializer with pipeline configuration.
107
+
108
+ Args:
109
+ frame: The StartFrame containing pipeline configuration.
110
+ """
111
+ self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
112
+
113
+ async def serialize(self, frame: Frame) -> str | bytes | None:
114
+ """Serializes a Pipecat frame to Custom telephony WebSocket format.
115
+
116
+ Handles conversion of various frame types to Ringg AI WebSocket messages.
117
+
118
+ Args:
119
+ frame: The Pipecat frame to serialize.
120
+
121
+ Returns:
122
+ Serialized data as JSON string, or None if the frame isn't handled.
123
+ """
124
+ if isinstance(frame, InterruptionFrame):
125
+ # Send clear event to instruct client to discard buffered audio
126
+ answer = {"event": "clear", "stream_sid": self._stream_sid}
127
+ return json.dumps(answer)
128
+
129
+ elif isinstance(frame, CallTransferFrame):
130
+ # Send call_transfer event to transfer the call to another number
131
+ answer = {
132
+ "event": "call_transfer",
133
+ "call_sid": self._call_sid or self._stream_sid,
134
+ "to": frame.target,
135
+ }
136
+ return json.dumps(answer)
137
+
138
+ elif isinstance(frame, (EndFrame, CancelFrame)):
139
+ # Send hang_up event to end the call
140
+ answer = {"event": "hang_up", "stream_sid": self._stream_sid}
141
+ return json.dumps(answer)
142
+
143
+ elif isinstance(frame, AudioRawFrame):
144
+ data = frame.audio
145
+
146
+ # Convert audio based on codec
147
+ if self._codec == "pcmu":
148
+ # Convert PCM to μ-law for PCMU codec
149
+ serialized_data = await pcm_to_ulaw(
150
+ data, frame.sample_rate, self._custom_sample_rate, self._output_resampler
151
+ )
152
+ elif self._codec == "pcma":
153
+ # Convert PCM to A-law for PCMA codec
154
+ serialized_data = await pcm_to_alaw(
155
+ data, frame.sample_rate, self._custom_sample_rate, self._output_resampler
156
+ )
157
+ else: # pcm
158
+ # Resample PCM to target sample rate
159
+ serialized_data = await self._output_resampler.resample(
160
+ data, frame.sample_rate, self._custom_sample_rate
161
+ )
162
+
163
+ if serialized_data is None or len(serialized_data) == 0:
164
+ # Skip if no audio data
165
+ return None
166
+
167
+ payload = base64.b64encode(serialized_data).decode("ascii")
168
+ answer = {
169
+ "event": "media",
170
+ "stream_sid": self._stream_sid,
171
+ "media": {"payload": payload},
172
+ }
173
+
174
+ return json.dumps(answer)
175
+
176
+ elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
177
+ return json.dumps(frame.message)
178
+
179
+ return None
180
+
181
+ async def deserialize(self, data: str | bytes) -> Frame | None:
182
+ """Deserializes Custom telephony WebSocket data to Pipecat frames.
183
+
184
+ Handles conversion of Ringg AI WebSocket events to appropriate Pipecat frames.
185
+
186
+ Args:
187
+ data: The raw WebSocket data from external client.
188
+
189
+ Returns:
190
+ A Pipecat frame corresponding to the event, or None if unhandled.
191
+ """
192
+ try:
193
+ message = json.loads(data)
194
+ except json.JSONDecodeError as e:
195
+ logger.error(f"Failed to parse JSON message: {e}")
196
+ return None
197
+
198
+ event = message.get("event")
199
+
200
+ if event == "media":
201
+ media = message.get("media", {})
202
+ payload_base64 = media.get("payload")
203
+ uuid = message.get("uuid")
204
+
205
+ if not payload_base64:
206
+ logger.warning("Media event missing payload")
207
+ return None
208
+
209
+ try:
210
+ payload = base64.b64decode(payload_base64)
211
+ except Exception as e:
212
+ logger.error(f"Failed to decode base64 payload: {e}")
213
+ return None
214
+
215
+ # Convert audio based on codec
216
+ if self._codec == "pcmu":
217
+ # Convert μ-law to PCM
218
+ deserialized_data = await ulaw_to_pcm(
219
+ payload, self._custom_sample_rate, self._sample_rate, self._input_resampler
220
+ )
221
+ elif self._codec == "pcma":
222
+ # Convert A-law to PCM
223
+ deserialized_data = await alaw_to_pcm(
224
+ payload, self._custom_sample_rate, self._sample_rate, self._input_resampler
225
+ )
226
+ else: # pcm
227
+ # Resample PCM to pipeline sample rate
228
+ deserialized_data = await self._input_resampler.resample(
229
+ payload,
230
+ self._custom_sample_rate,
231
+ self._sample_rate,
232
+ )
233
+
234
+ if deserialized_data is None or len(deserialized_data) == 0:
235
+ # Skip if no audio data
236
+ return None
237
+
238
+ audio_frame = InputAudioRawFrame(
239
+ audio=deserialized_data,
240
+ num_channels=1, # Mono audio
241
+ sample_rate=self._sample_rate,
242
+ )
243
+ return audio_frame
244
+
245
+ elif event == "start":
246
+ # Log start event but don't generate a frame (handled by WebSocketService)
247
+ logger.debug(f"Received start event for stream {self._stream_sid}")
248
+ return None
249
+
250
+ elif event == "clear":
251
+ # External client requesting to clear our audio buffers
252
+ logger.debug(f"Received clear event for stream {self._stream_sid}")
253
+ return None
254
+
255
+ else:
256
+ logger.debug(f"Unhandled event type: {event} for stream {self._stream_sid}")
257
+ return None
@@ -20,10 +20,10 @@ from pipecat.frames.frames import (
20
20
  Frame,
21
21
  InputAudioRawFrame,
22
22
  InputDTMFFrame,
23
+ InterruptionFrame,
24
+ OutputTransportMessageFrame,
25
+ OutputTransportMessageUrgentFrame,
23
26
  StartFrame,
24
- StartInterruptionFrame,
25
- TransportMessageFrame,
26
- TransportMessageUrgentFrame,
27
27
  )
28
28
  from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
29
29
 
@@ -98,7 +98,7 @@ class ExotelFrameSerializer(FrameSerializer):
98
98
  Returns:
99
99
  Serialized data as string or bytes, or None if the frame isn't handled.
100
100
  """
101
- if isinstance(frame, StartInterruptionFrame):
101
+ if isinstance(frame, InterruptionFrame):
102
102
  answer = {"event": "clear", "streamSid": self._stream_sid}
103
103
  return json.dumps(answer)
104
104
  elif isinstance(frame, AudioRawFrame):
@@ -121,7 +121,7 @@ class ExotelFrameSerializer(FrameSerializer):
121
121
  }
122
122
 
123
123
  return json.dumps(answer)
124
- elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
124
+ elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
125
125
  return json.dumps(frame.message)
126
126
 
127
127
  return None
@@ -25,11 +25,31 @@ except ModuleNotFoundError as e:
25
25
  class LivekitFrameSerializer(FrameSerializer):
26
26
  """Serializer for converting between Pipecat frames and LiveKit audio frames.
27
27
 
28
+ .. deprecated:: 0.0.90
29
+
30
+ This class is deprecated and will be removed in a future version.
31
+ Please use LiveKitTransport instead, which handles audio streaming
32
+ and frame conversion natively.
33
+
28
34
  This serializer handles the conversion of Pipecat's OutputAudioRawFrame objects
29
35
  to LiveKit AudioFrame objects for transmission, and the reverse conversion
30
36
  for received audio data.
31
37
  """
32
38
 
39
+ def __init__(self):
40
+ """Initialize the LiveKit frame serializer."""
41
+ super().__init__()
42
+ import warnings
43
+
44
+ with warnings.catch_warnings():
45
+ warnings.simplefilter("always")
46
+ warnings.warn(
47
+ "LivekitFrameSerializer is deprecated and will be removed in a future version. "
48
+ "Please use LiveKitTransport instead, which handles audio streaming natively.",
49
+ DeprecationWarning,
50
+ stacklevel=2,
51
+ )
52
+
33
53
  @property
34
54
  def type(self) -> FrameSerializerType:
35
55
  """Get the serializer type.
@@ -22,10 +22,10 @@ from pipecat.frames.frames import (
22
22
  Frame,
23
23
  InputAudioRawFrame,
24
24
  InputDTMFFrame,
25
+ InterruptionFrame,
26
+ OutputTransportMessageFrame,
27
+ OutputTransportMessageUrgentFrame,
25
28
  StartFrame,
26
- StartInterruptionFrame,
27
- TransportMessageFrame,
28
- TransportMessageUrgentFrame,
29
29
  )
30
30
  from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
31
31
 
@@ -122,7 +122,7 @@ class PlivoFrameSerializer(FrameSerializer):
122
122
  self._hangup_attempted = True
123
123
  await self._hang_up_call()
124
124
  return None
125
- elif isinstance(frame, StartInterruptionFrame):
125
+ elif isinstance(frame, InterruptionFrame):
126
126
  answer = {"event": "clearAudio", "streamId": self._stream_id}
127
127
  return json.dumps(answer)
128
128
  elif isinstance(frame, AudioRawFrame):
@@ -148,7 +148,7 @@ class PlivoFrameSerializer(FrameSerializer):
148
148
  }
149
149
 
150
150
  return json.dumps(answer)
151
- elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
151
+ elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
152
152
  return json.dumps(frame.message)
153
153
 
154
154
  # Return None for unhandled frames