dv-pipecat-ai 0.0.82.dev857__py3-none-any.whl → 0.0.85.dev837__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of dv-pipecat-ai might be problematic. Click here for more details.
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/METADATA +98 -130
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/RECORD +192 -140
- pipecat/adapters/base_llm_adapter.py +38 -1
- pipecat/adapters/services/anthropic_adapter.py +9 -14
- pipecat/adapters/services/aws_nova_sonic_adapter.py +120 -5
- pipecat/adapters/services/bedrock_adapter.py +236 -13
- pipecat/adapters/services/gemini_adapter.py +12 -8
- pipecat/adapters/services/open_ai_adapter.py +19 -7
- pipecat/adapters/services/open_ai_realtime_adapter.py +5 -0
- pipecat/audio/dtmf/dtmf-0.wav +0 -0
- pipecat/audio/dtmf/dtmf-1.wav +0 -0
- pipecat/audio/dtmf/dtmf-2.wav +0 -0
- pipecat/audio/dtmf/dtmf-3.wav +0 -0
- pipecat/audio/dtmf/dtmf-4.wav +0 -0
- pipecat/audio/dtmf/dtmf-5.wav +0 -0
- pipecat/audio/dtmf/dtmf-6.wav +0 -0
- pipecat/audio/dtmf/dtmf-7.wav +0 -0
- pipecat/audio/dtmf/dtmf-8.wav +0 -0
- pipecat/audio/dtmf/dtmf-9.wav +0 -0
- pipecat/audio/dtmf/dtmf-pound.wav +0 -0
- pipecat/audio/dtmf/dtmf-star.wav +0 -0
- pipecat/audio/filters/krisp_viva_filter.py +193 -0
- pipecat/audio/filters/noisereduce_filter.py +15 -0
- pipecat/audio/turn/base_turn_analyzer.py +9 -1
- pipecat/audio/turn/smart_turn/base_smart_turn.py +14 -8
- pipecat/audio/turn/smart_turn/data/__init__.py +0 -0
- pipecat/audio/turn/smart_turn/data/smart-turn-v3.0.onnx +0 -0
- pipecat/audio/turn/smart_turn/http_smart_turn.py +6 -2
- pipecat/audio/turn/smart_turn/local_smart_turn.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v2.py +1 -1
- pipecat/audio/turn/smart_turn/local_smart_turn_v3.py +124 -0
- pipecat/audio/vad/data/README.md +10 -0
- pipecat/audio/vad/data/silero_vad_v2.onnx +0 -0
- pipecat/audio/vad/silero.py +9 -3
- pipecat/audio/vad/vad_analyzer.py +13 -1
- pipecat/extensions/voicemail/voicemail_detector.py +5 -5
- pipecat/frames/frames.py +277 -86
- pipecat/observers/loggers/debug_log_observer.py +3 -3
- pipecat/observers/loggers/llm_log_observer.py +7 -3
- pipecat/observers/loggers/user_bot_latency_log_observer.py +22 -10
- pipecat/pipeline/runner.py +18 -6
- pipecat/pipeline/service_switcher.py +64 -36
- pipecat/pipeline/task.py +125 -79
- pipecat/pipeline/tts_switcher.py +30 -0
- pipecat/processors/aggregators/dtmf_aggregator.py +2 -3
- pipecat/processors/aggregators/{gated_openai_llm_context.py → gated_llm_context.py} +9 -9
- pipecat/processors/aggregators/gated_open_ai_llm_context.py +12 -0
- pipecat/processors/aggregators/llm_context.py +40 -2
- pipecat/processors/aggregators/llm_response.py +32 -15
- pipecat/processors/aggregators/llm_response_universal.py +19 -15
- pipecat/processors/aggregators/user_response.py +6 -6
- pipecat/processors/aggregators/vision_image_frame.py +24 -2
- pipecat/processors/audio/audio_buffer_processor.py +43 -8
- pipecat/processors/dtmf_aggregator.py +174 -77
- pipecat/processors/filters/stt_mute_filter.py +17 -0
- pipecat/processors/frame_processor.py +110 -24
- pipecat/processors/frameworks/langchain.py +8 -2
- pipecat/processors/frameworks/rtvi.py +210 -68
- pipecat/processors/frameworks/strands_agents.py +170 -0
- pipecat/processors/logger.py +2 -2
- pipecat/processors/transcript_processor.py +26 -5
- pipecat/processors/user_idle_processor.py +35 -11
- pipecat/runner/daily.py +59 -20
- pipecat/runner/run.py +395 -93
- pipecat/runner/types.py +6 -4
- pipecat/runner/utils.py +51 -10
- pipecat/serializers/__init__.py +5 -1
- pipecat/serializers/asterisk.py +16 -2
- pipecat/serializers/convox.py +41 -4
- pipecat/serializers/custom.py +257 -0
- pipecat/serializers/exotel.py +5 -5
- pipecat/serializers/livekit.py +20 -0
- pipecat/serializers/plivo.py +5 -5
- pipecat/serializers/protobuf.py +6 -5
- pipecat/serializers/telnyx.py +2 -2
- pipecat/serializers/twilio.py +43 -23
- pipecat/serializers/vi.py +324 -0
- pipecat/services/ai_service.py +2 -6
- pipecat/services/anthropic/llm.py +2 -25
- pipecat/services/assemblyai/models.py +6 -0
- pipecat/services/assemblyai/stt.py +13 -5
- pipecat/services/asyncai/tts.py +5 -3
- pipecat/services/aws/__init__.py +1 -0
- pipecat/services/aws/llm.py +147 -105
- pipecat/services/aws/nova_sonic/__init__.py +0 -0
- pipecat/services/aws/nova_sonic/context.py +436 -0
- pipecat/services/aws/nova_sonic/frames.py +25 -0
- pipecat/services/aws/nova_sonic/llm.py +1265 -0
- pipecat/services/aws/stt.py +3 -3
- pipecat/services/aws_nova_sonic/__init__.py +19 -1
- pipecat/services/aws_nova_sonic/aws.py +11 -1151
- pipecat/services/aws_nova_sonic/context.py +8 -354
- pipecat/services/aws_nova_sonic/frames.py +13 -17
- pipecat/services/azure/llm.py +51 -1
- pipecat/services/azure/realtime/__init__.py +0 -0
- pipecat/services/azure/realtime/llm.py +65 -0
- pipecat/services/azure/stt.py +15 -0
- pipecat/services/cartesia/stt.py +77 -70
- pipecat/services/cartesia/tts.py +80 -13
- pipecat/services/deepgram/__init__.py +1 -0
- pipecat/services/deepgram/flux/__init__.py +0 -0
- pipecat/services/deepgram/flux/stt.py +640 -0
- pipecat/services/elevenlabs/__init__.py +4 -1
- pipecat/services/elevenlabs/stt.py +339 -0
- pipecat/services/elevenlabs/tts.py +87 -46
- pipecat/services/fish/tts.py +5 -2
- pipecat/services/gemini_multimodal_live/events.py +38 -524
- pipecat/services/gemini_multimodal_live/file_api.py +23 -173
- pipecat/services/gemini_multimodal_live/gemini.py +41 -1403
- pipecat/services/gladia/stt.py +56 -72
- pipecat/services/google/__init__.py +1 -0
- pipecat/services/google/gemini_live/__init__.py +3 -0
- pipecat/services/google/gemini_live/file_api.py +189 -0
- pipecat/services/google/gemini_live/llm.py +1582 -0
- pipecat/services/google/gemini_live/llm_vertex.py +184 -0
- pipecat/services/google/llm.py +15 -11
- pipecat/services/google/llm_openai.py +3 -3
- pipecat/services/google/llm_vertex.py +86 -16
- pipecat/services/google/stt.py +4 -0
- pipecat/services/google/tts.py +7 -3
- pipecat/services/heygen/api.py +2 -0
- pipecat/services/heygen/client.py +8 -4
- pipecat/services/heygen/video.py +2 -0
- pipecat/services/hume/__init__.py +5 -0
- pipecat/services/hume/tts.py +220 -0
- pipecat/services/inworld/tts.py +6 -6
- pipecat/services/llm_service.py +15 -5
- pipecat/services/lmnt/tts.py +4 -2
- pipecat/services/mcp_service.py +4 -2
- pipecat/services/mem0/memory.py +6 -5
- pipecat/services/mistral/llm.py +29 -8
- pipecat/services/moondream/vision.py +42 -16
- pipecat/services/neuphonic/tts.py +5 -2
- pipecat/services/openai/__init__.py +1 -0
- pipecat/services/openai/base_llm.py +27 -20
- pipecat/services/openai/realtime/__init__.py +0 -0
- pipecat/services/openai/realtime/context.py +272 -0
- pipecat/services/openai/realtime/events.py +1106 -0
- pipecat/services/openai/realtime/frames.py +37 -0
- pipecat/services/openai/realtime/llm.py +829 -0
- pipecat/services/openai/tts.py +49 -10
- pipecat/services/openai_realtime/__init__.py +27 -0
- pipecat/services/openai_realtime/azure.py +21 -0
- pipecat/services/openai_realtime/context.py +21 -0
- pipecat/services/openai_realtime/events.py +21 -0
- pipecat/services/openai_realtime/frames.py +21 -0
- pipecat/services/openai_realtime_beta/azure.py +16 -0
- pipecat/services/openai_realtime_beta/openai.py +17 -5
- pipecat/services/piper/tts.py +7 -9
- pipecat/services/playht/tts.py +34 -4
- pipecat/services/rime/tts.py +12 -12
- pipecat/services/riva/stt.py +3 -1
- pipecat/services/salesforce/__init__.py +9 -0
- pipecat/services/salesforce/llm.py +700 -0
- pipecat/services/sarvam/__init__.py +7 -0
- pipecat/services/sarvam/stt.py +540 -0
- pipecat/services/sarvam/tts.py +97 -13
- pipecat/services/simli/video.py +2 -2
- pipecat/services/speechmatics/stt.py +22 -10
- pipecat/services/stt_service.py +47 -0
- pipecat/services/tavus/video.py +2 -2
- pipecat/services/tts_service.py +75 -22
- pipecat/services/vision_service.py +7 -6
- pipecat/services/vistaar/llm.py +51 -9
- pipecat/tests/utils.py +4 -4
- pipecat/transcriptions/language.py +41 -1
- pipecat/transports/base_input.py +13 -34
- pipecat/transports/base_output.py +140 -104
- pipecat/transports/daily/transport.py +199 -26
- pipecat/transports/heygen/__init__.py +0 -0
- pipecat/transports/heygen/transport.py +381 -0
- pipecat/transports/livekit/transport.py +228 -63
- pipecat/transports/local/audio.py +6 -1
- pipecat/transports/local/tk.py +11 -2
- pipecat/transports/network/fastapi_websocket.py +1 -1
- pipecat/transports/smallwebrtc/connection.py +103 -19
- pipecat/transports/smallwebrtc/request_handler.py +246 -0
- pipecat/transports/smallwebrtc/transport.py +65 -23
- pipecat/transports/tavus/transport.py +23 -12
- pipecat/transports/websocket/client.py +41 -5
- pipecat/transports/websocket/fastapi.py +21 -11
- pipecat/transports/websocket/server.py +14 -7
- pipecat/transports/whatsapp/api.py +8 -0
- pipecat/transports/whatsapp/client.py +47 -0
- pipecat/utils/base_object.py +54 -22
- pipecat/utils/redis.py +58 -0
- pipecat/utils/string.py +13 -1
- pipecat/utils/tracing/service_decorators.py +21 -21
- pipecat/serializers/genesys.py +0 -95
- pipecat/services/google/test-google-chirp.py +0 -45
- pipecat/services/openai.py +0 -698
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/WHEEL +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/licenses/LICENSE +0 -0
- {dv_pipecat_ai-0.0.82.dev857.dist-info → dv_pipecat_ai-0.0.85.dev837.dist-info}/top_level.txt +0 -0
- /pipecat/services/{aws_nova_sonic → aws/nova_sonic}/ready.wav +0 -0
pipecat/runner/utils.py
CHANGED
|
@@ -99,16 +99,47 @@ async def parse_telephony_websocket(websocket: WebSocket):
|
|
|
99
99
|
tuple: (transport_type: str, call_data: dict)
|
|
100
100
|
|
|
101
101
|
call_data contains provider-specific fields:
|
|
102
|
-
|
|
103
|
-
-
|
|
104
|
-
|
|
105
|
-
|
|
102
|
+
|
|
103
|
+
- Twilio::
|
|
104
|
+
|
|
105
|
+
{
|
|
106
|
+
"stream_id": str,
|
|
107
|
+
"call_id": str,
|
|
108
|
+
"body": dict
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
- Telnyx::
|
|
112
|
+
|
|
113
|
+
{
|
|
114
|
+
"stream_id": str,
|
|
115
|
+
"call_control_id": str,
|
|
116
|
+
"outbound_encoding": str,
|
|
117
|
+
"from": str,
|
|
118
|
+
"to": str,
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
- Plivo::
|
|
122
|
+
|
|
123
|
+
{
|
|
124
|
+
"stream_id": str,
|
|
125
|
+
"call_id": str,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
- Exotel::
|
|
129
|
+
|
|
130
|
+
{
|
|
131
|
+
"stream_id": str,
|
|
132
|
+
"call_id": str,
|
|
133
|
+
"account_sid": str,
|
|
134
|
+
"from": str,
|
|
135
|
+
"to": str,
|
|
136
|
+
}
|
|
106
137
|
|
|
107
138
|
Example usage::
|
|
108
139
|
|
|
109
140
|
transport_type, call_data = await parse_telephony_websocket(websocket)
|
|
110
|
-
if transport_type == "
|
|
111
|
-
|
|
141
|
+
if transport_type == "twilio":
|
|
142
|
+
user_id = call_data["body"]["user_id"]
|
|
112
143
|
"""
|
|
113
144
|
# Read first two messages
|
|
114
145
|
start_data = websocket.iter_text()
|
|
@@ -151,9 +182,12 @@ async def parse_telephony_websocket(websocket: WebSocket):
|
|
|
151
182
|
# Extract provider-specific data
|
|
152
183
|
if transport_type == "twilio":
|
|
153
184
|
start_data = call_data_raw.get("start", {})
|
|
185
|
+
body_data = start_data.get("customParameters", {})
|
|
154
186
|
call_data = {
|
|
155
187
|
"stream_id": start_data.get("streamSid"),
|
|
156
188
|
"call_id": start_data.get("callSid"),
|
|
189
|
+
# All custom parameters
|
|
190
|
+
"body": body_data,
|
|
157
191
|
}
|
|
158
192
|
|
|
159
193
|
elif transport_type == "telnyx":
|
|
@@ -163,6 +197,8 @@ async def parse_telephony_websocket(websocket: WebSocket):
|
|
|
163
197
|
"outbound_encoding": call_data_raw.get("start", {})
|
|
164
198
|
.get("media_format", {})
|
|
165
199
|
.get("encoding"),
|
|
200
|
+
"from": call_data_raw.get("start", {}).get("from", ""),
|
|
201
|
+
"to": call_data_raw.get("start", {}).get("to", ""),
|
|
166
202
|
}
|
|
167
203
|
|
|
168
204
|
elif transport_type == "plivo":
|
|
@@ -178,6 +214,8 @@ async def parse_telephony_websocket(websocket: WebSocket):
|
|
|
178
214
|
"stream_id": start_data.get("stream_sid"),
|
|
179
215
|
"call_id": start_data.get("call_sid"),
|
|
180
216
|
"account_sid": start_data.get("account_sid"),
|
|
217
|
+
"from": start_data.get("from", ""),
|
|
218
|
+
"to": start_data.get("to", ""),
|
|
181
219
|
}
|
|
182
220
|
|
|
183
221
|
else:
|
|
@@ -275,6 +313,7 @@ def _smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
|
|
|
275
313
|
Returns:
|
|
276
314
|
Cleaned SDP text with filtered ICE candidates.
|
|
277
315
|
"""
|
|
316
|
+
logger.debug("Removing unsupported ICE candidates from SDP")
|
|
278
317
|
result = []
|
|
279
318
|
lines = text.splitlines()
|
|
280
319
|
for line in lines:
|
|
@@ -283,7 +322,7 @@ def _smallwebrtc_sdp_cleanup_ice_candidates(text: str, pattern: str) -> str:
|
|
|
283
322
|
result.append(line)
|
|
284
323
|
else:
|
|
285
324
|
result.append(line)
|
|
286
|
-
return "\r\n".join(result)
|
|
325
|
+
return "\r\n".join(result) + "\r\n"
|
|
287
326
|
|
|
288
327
|
|
|
289
328
|
def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
|
|
@@ -295,15 +334,16 @@ def _smallwebrtc_sdp_cleanup_fingerprints(text: str) -> str:
|
|
|
295
334
|
Returns:
|
|
296
335
|
SDP text with sha-384 and sha-512 fingerprints removed.
|
|
297
336
|
"""
|
|
337
|
+
logger.debug("Removing unsupported fingerprints from SDP")
|
|
298
338
|
result = []
|
|
299
339
|
lines = text.splitlines()
|
|
300
340
|
for line in lines:
|
|
301
341
|
if not re.search("sha-384", line) and not re.search("sha-512", line):
|
|
302
342
|
result.append(line)
|
|
303
|
-
return "\r\n".join(result)
|
|
343
|
+
return "\r\n".join(result) + "\r\n"
|
|
304
344
|
|
|
305
345
|
|
|
306
|
-
def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
|
|
346
|
+
def smallwebrtc_sdp_munging(sdp: str, host: Optional[str]) -> str:
|
|
307
347
|
"""Apply SDP modifications for SmallWebRTC compatibility.
|
|
308
348
|
|
|
309
349
|
Args:
|
|
@@ -314,7 +354,8 @@ def smallwebrtc_sdp_munging(sdp: str, host: str) -> str:
|
|
|
314
354
|
Modified SDP string with fingerprint and ICE candidate cleanup.
|
|
315
355
|
"""
|
|
316
356
|
sdp = _smallwebrtc_sdp_cleanup_fingerprints(sdp)
|
|
317
|
-
|
|
357
|
+
if host:
|
|
358
|
+
sdp = _smallwebrtc_sdp_cleanup_ice_candidates(sdp, host)
|
|
318
359
|
return sdp
|
|
319
360
|
|
|
320
361
|
|
pipecat/serializers/__init__.py
CHANGED
|
@@ -1,18 +1,22 @@
|
|
|
1
1
|
from .base_serializer import FrameSerializer, FrameSerializerType
|
|
2
2
|
from .convox import ConVoxFrameSerializer
|
|
3
|
+
from .custom import CustomFrameSerializer
|
|
3
4
|
from .exotel import ExotelFrameSerializer
|
|
4
5
|
from .plivo import PlivoFrameSerializer
|
|
5
6
|
from .telnyx import TelnyxFrameSerializer
|
|
6
7
|
from .twilio import TwilioFrameSerializer
|
|
8
|
+
from .vi import VIFrameSerializer
|
|
7
9
|
|
|
8
10
|
__all__ = [
|
|
9
11
|
"FrameSerializer",
|
|
10
|
-
"FrameSerializerType",
|
|
12
|
+
"FrameSerializerType",
|
|
11
13
|
"ConVoxFrameSerializer",
|
|
14
|
+
"CustomFrameSerializer",
|
|
12
15
|
"ExotelFrameSerializer",
|
|
13
16
|
"PlivoFrameSerializer",
|
|
14
17
|
"TelnyxFrameSerializer",
|
|
15
18
|
"TwilioFrameSerializer",
|
|
19
|
+
"VIFrameSerializer",
|
|
16
20
|
]
|
|
17
21
|
|
|
18
22
|
# Optional imports
|
pipecat/serializers/asterisk.py
CHANGED
|
@@ -1,4 +1,6 @@
|
|
|
1
1
|
# asterisk_ws_serializer.py
|
|
2
|
+
"""Frame serializer for Asterisk WebSocket communication."""
|
|
3
|
+
|
|
2
4
|
import base64
|
|
3
5
|
import json
|
|
4
6
|
from typing import Literal, Optional
|
|
@@ -12,8 +14,8 @@ from pipecat.frames.frames import (
|
|
|
12
14
|
EndFrame,
|
|
13
15
|
Frame,
|
|
14
16
|
InputAudioRawFrame,
|
|
17
|
+
InterruptionFrame,
|
|
15
18
|
StartFrame,
|
|
16
|
-
StartInterruptionFrame,
|
|
17
19
|
TransportMessageFrame,
|
|
18
20
|
TransportMessageUrgentFrame,
|
|
19
21
|
)
|
|
@@ -21,6 +23,8 @@ from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializer
|
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class AsteriskFrameSerializer(FrameSerializer):
|
|
26
|
+
"""Serializes Pipecat frames to/from Asterisk WebSocket JSON messages."""
|
|
27
|
+
|
|
24
28
|
class InputParams(BaseModel):
|
|
25
29
|
"""Configuration parameters for AsteriskFrameSerializer.
|
|
26
30
|
|
|
@@ -39,6 +43,12 @@ class AsteriskFrameSerializer(FrameSerializer):
|
|
|
39
43
|
auto_hang_up: bool = False # no-op here; adapter handles hangup
|
|
40
44
|
|
|
41
45
|
def __init__(self, stream_id: str, params: Optional[InputParams] = None):
|
|
46
|
+
"""Initialize the Asterisk frame serializer.
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
stream_id: Unique identifier for the media stream.
|
|
50
|
+
params: Configuration parameters for the serializer.
|
|
51
|
+
"""
|
|
42
52
|
self._stream_id = stream_id
|
|
43
53
|
self._params = params or AsteriskFrameSerializer.InputParams()
|
|
44
54
|
self._tel_rate = self._params.telephony_sample_rate
|
|
@@ -49,13 +59,16 @@ class AsteriskFrameSerializer(FrameSerializer):
|
|
|
49
59
|
|
|
50
60
|
@property
|
|
51
61
|
def type(self) -> FrameSerializerType:
|
|
62
|
+
"""Return the serializer type (TEXT for JSON messages)."""
|
|
52
63
|
return FrameSerializerType.TEXT # we send/recv JSON strings
|
|
53
64
|
|
|
54
65
|
async def setup(self, frame: StartFrame):
|
|
66
|
+
"""Setup the serializer with audio parameters from the StartFrame."""
|
|
55
67
|
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
|
56
68
|
|
|
57
69
|
# Pipecat -> Adapter (play to caller)
|
|
58
70
|
async def serialize(self, frame: Frame) -> str | bytes | None:
|
|
71
|
+
"""Serialize Pipecat frames to Asterisk WebSocket JSON messages."""
|
|
59
72
|
# On pipeline end, ask bridge to hang up
|
|
60
73
|
if (
|
|
61
74
|
self._params.auto_hang_up
|
|
@@ -64,7 +77,7 @@ class AsteriskFrameSerializer(FrameSerializer):
|
|
|
64
77
|
):
|
|
65
78
|
self._hangup_sent = True
|
|
66
79
|
return json.dumps({"event": "hangup"})
|
|
67
|
-
if isinstance(frame,
|
|
80
|
+
if isinstance(frame, InterruptionFrame):
|
|
68
81
|
return json.dumps({"event": "clear", "streamId": self._stream_id})
|
|
69
82
|
if isinstance(frame, AudioRawFrame):
|
|
70
83
|
pcm = frame.audio
|
|
@@ -114,6 +127,7 @@ class AsteriskFrameSerializer(FrameSerializer):
|
|
|
114
127
|
|
|
115
128
|
# Adapter -> Pipecat (audio from caller)
|
|
116
129
|
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
130
|
+
"""Deserialize Asterisk WebSocket JSON messages to Pipecat frames."""
|
|
117
131
|
try:
|
|
118
132
|
msg = json.loads(data)
|
|
119
133
|
except Exception:
|
pipecat/serializers/convox.py
CHANGED
|
@@ -4,9 +4,11 @@
|
|
|
4
4
|
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
5
|
#
|
|
6
6
|
|
|
7
|
+
"""ConVox WebSocket frame serializer for audio streaming and call management."""
|
|
8
|
+
|
|
7
9
|
import base64
|
|
8
|
-
import datetime
|
|
9
10
|
import json
|
|
11
|
+
from datetime import datetime, timezone
|
|
10
12
|
from typing import Optional
|
|
11
13
|
|
|
12
14
|
from loguru import logger
|
|
@@ -20,9 +22,9 @@ from pipecat.frames.frames import (
|
|
|
20
22
|
Frame,
|
|
21
23
|
InputAudioRawFrame,
|
|
22
24
|
InputDTMFFrame,
|
|
25
|
+
InterruptionFrame,
|
|
23
26
|
KeypadEntry,
|
|
24
27
|
StartFrame,
|
|
25
|
-
StartInterruptionFrame,
|
|
26
28
|
TransportMessageFrame,
|
|
27
29
|
TransportMessageUrgentFrame,
|
|
28
30
|
)
|
|
@@ -99,6 +101,7 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
99
101
|
"""Serializes a Pipecat frame to ConVox WebSocket format.
|
|
100
102
|
|
|
101
103
|
Handles conversion of various frame types to ConVox WebSocket messages.
|
|
104
|
+
For EndFrames, initiates call termination if auto_hang_up is enabled.
|
|
102
105
|
|
|
103
106
|
Args:
|
|
104
107
|
frame: The Pipecat frame to serialize.
|
|
@@ -106,7 +109,15 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
106
109
|
Returns:
|
|
107
110
|
Serialized data as JSON string, or None if the frame isn't handled.
|
|
108
111
|
"""
|
|
109
|
-
if
|
|
112
|
+
if (
|
|
113
|
+
self._params.auto_hang_up
|
|
114
|
+
and not self._call_ended
|
|
115
|
+
and isinstance(frame, (EndFrame, CancelFrame))
|
|
116
|
+
):
|
|
117
|
+
self._call_ended = True
|
|
118
|
+
# Return the callEnd event to be sent via the WebSocket
|
|
119
|
+
return await self._send_call_end_event()
|
|
120
|
+
elif isinstance(frame, InterruptionFrame):
|
|
110
121
|
# Clear/interrupt command for ConVox
|
|
111
122
|
message = {
|
|
112
123
|
"event": "clear",
|
|
@@ -138,7 +149,7 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
138
149
|
payload = base64.b64encode(serialized_data).decode("ascii")
|
|
139
150
|
|
|
140
151
|
# ConVox expects play_audio event format according to the documentation
|
|
141
|
-
timestamp = datetime.
|
|
152
|
+
timestamp = datetime.now(timezone.utc).isoformat().replace("+00:00", "Z")
|
|
142
153
|
|
|
143
154
|
message = {
|
|
144
155
|
"event": "play_audio",
|
|
@@ -164,6 +175,32 @@ class ConVoxFrameSerializer(FrameSerializer):
|
|
|
164
175
|
|
|
165
176
|
return None
|
|
166
177
|
|
|
178
|
+
async def _send_call_end_event(self):
|
|
179
|
+
"""Send a callEnd event to ConVox to terminate the call.
|
|
180
|
+
|
|
181
|
+
This method is called when auto_hang_up is enabled and an EndFrame or
|
|
182
|
+
CancelFrame is received, similar to the logic in end_call_handler.py.
|
|
183
|
+
"""
|
|
184
|
+
try:
|
|
185
|
+
call_end_event = {
|
|
186
|
+
"event": "callEnd",
|
|
187
|
+
"details": {
|
|
188
|
+
"timestamp": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
|
|
189
|
+
"direction": "WSS",
|
|
190
|
+
"message": "Event trigger request",
|
|
191
|
+
},
|
|
192
|
+
}
|
|
193
|
+
|
|
194
|
+
logger.info(
|
|
195
|
+
f"ConVox auto_hang_up: Sending callEnd event for stream_id: {self._stream_id}, call_id: {self._call_id}"
|
|
196
|
+
)
|
|
197
|
+
# Note: The actual sending will be handled by the transport layer
|
|
198
|
+
# when this method returns the JSON string
|
|
199
|
+
return json.dumps(call_end_event)
|
|
200
|
+
except Exception as e:
|
|
201
|
+
logger.error(f"ConVox auto_hang_up: Failed to create callEnd event: {e}")
|
|
202
|
+
return None
|
|
203
|
+
|
|
167
204
|
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
168
205
|
"""Deserializes ConVox WebSocket data to Pipecat frames.
|
|
169
206
|
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2024–2025, Daily
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD 2-Clause License
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
"""Custom/External telephony serializer for Pipecat with Ringg AI WebSocket API. Customers will directly connect to Ringg AI WebSocket API."""
|
|
8
|
+
|
|
9
|
+
import base64
|
|
10
|
+
import json
|
|
11
|
+
import uuid
|
|
12
|
+
from typing import Optional
|
|
13
|
+
|
|
14
|
+
from loguru import logger
|
|
15
|
+
from pydantic import BaseModel
|
|
16
|
+
|
|
17
|
+
from pipecat.audio.utils import (
|
|
18
|
+
alaw_to_pcm,
|
|
19
|
+
create_stream_resampler,
|
|
20
|
+
pcm_to_alaw,
|
|
21
|
+
pcm_to_ulaw,
|
|
22
|
+
ulaw_to_pcm,
|
|
23
|
+
)
|
|
24
|
+
from pipecat.frames.frames import (
|
|
25
|
+
AudioRawFrame,
|
|
26
|
+
CallTransferFrame,
|
|
27
|
+
CancelFrame,
|
|
28
|
+
EndFrame,
|
|
29
|
+
Frame,
|
|
30
|
+
InputAudioRawFrame,
|
|
31
|
+
InterruptionFrame,
|
|
32
|
+
StartFrame,
|
|
33
|
+
TransportMessageFrame,
|
|
34
|
+
TransportMessageUrgentFrame,
|
|
35
|
+
)
|
|
36
|
+
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class CustomFrameSerializer(FrameSerializer):
|
|
40
|
+
"""Serializer for Custom/External telephony WebSocket protocol (Ringg AI API).
|
|
41
|
+
|
|
42
|
+
This serializer handles converting between Pipecat frames and the Ringg AI
|
|
43
|
+
WebSocket protocol for external/custom telephony providers. It supports
|
|
44
|
+
PCMU (μ-law), PCMA (A-law), and PCM codecs with automatic conversion.
|
|
45
|
+
|
|
46
|
+
Supported events:
|
|
47
|
+
- start: Initialize call with agent configuration
|
|
48
|
+
- media: Bidirectional audio streaming
|
|
49
|
+
- clear: Clear audio buffers (interruption)
|
|
50
|
+
- call_transfer: Transfer call to another number
|
|
51
|
+
- hang_up: End call notification
|
|
52
|
+
|
|
53
|
+
Audio format:
|
|
54
|
+
- Sample Rate: Configurable (default 8kHz)
|
|
55
|
+
- Channels: Mono (1 channel)
|
|
56
|
+
- Bit Depth: 16-bit
|
|
57
|
+
- Encoding: Little-endian
|
|
58
|
+
- Payload Encoding: Base64
|
|
59
|
+
- Supported Codecs: PCMU (μ-law), PCMA (A-law), PCM (raw)
|
|
60
|
+
"""
|
|
61
|
+
|
|
62
|
+
class InputParams(BaseModel):
|
|
63
|
+
"""Configuration parameters for CustomFrameSerializer.
|
|
64
|
+
|
|
65
|
+
Parameters:
|
|
66
|
+
custom_sample_rate: Sample rate used by external client, defaults to 8000 Hz.
|
|
67
|
+
sample_rate: Optional override for pipeline input sample rate.
|
|
68
|
+
codec: Audio codec - "pcmu" (μ-law), "pcma" (A-law), or "pcm" (raw PCM).
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
custom_sample_rate: int = 8000
|
|
72
|
+
sample_rate: Optional[int] = None
|
|
73
|
+
codec: str = "pcmu" # "pcmu" or "pcm"
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self, stream_sid: str, call_sid: Optional[str] = None, params: Optional[InputParams] = None
|
|
77
|
+
):
|
|
78
|
+
"""Initialize the CustomFrameSerializer.
|
|
79
|
+
|
|
80
|
+
Args:
|
|
81
|
+
stream_sid: The stream identifier from external client.
|
|
82
|
+
call_sid: The call identifier from external client.
|
|
83
|
+
params: Configuration parameters.
|
|
84
|
+
"""
|
|
85
|
+
self._stream_sid = stream_sid
|
|
86
|
+
self._call_sid = call_sid
|
|
87
|
+
self._params = params or CustomFrameSerializer.InputParams()
|
|
88
|
+
|
|
89
|
+
self._custom_sample_rate = self._params.custom_sample_rate
|
|
90
|
+
self._sample_rate = 0 # Pipeline input rate
|
|
91
|
+
self._codec = self._params.codec.lower()
|
|
92
|
+
|
|
93
|
+
self._input_resampler = create_stream_resampler()
|
|
94
|
+
self._output_resampler = create_stream_resampler()
|
|
95
|
+
|
|
96
|
+
@property
|
|
97
|
+
def type(self) -> FrameSerializerType:
|
|
98
|
+
"""Gets the serializer type.
|
|
99
|
+
|
|
100
|
+
Returns:
|
|
101
|
+
The serializer type, TEXT for JSON-based protocol.
|
|
102
|
+
"""
|
|
103
|
+
return FrameSerializerType.TEXT
|
|
104
|
+
|
|
105
|
+
async def setup(self, frame: StartFrame):
|
|
106
|
+
"""Sets up the serializer with pipeline configuration.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
frame: The StartFrame containing pipeline configuration.
|
|
110
|
+
"""
|
|
111
|
+
self._sample_rate = self._params.sample_rate or frame.audio_in_sample_rate
|
|
112
|
+
|
|
113
|
+
async def serialize(self, frame: Frame) -> str | bytes | None:
|
|
114
|
+
"""Serializes a Pipecat frame to Custom telephony WebSocket format.
|
|
115
|
+
|
|
116
|
+
Handles conversion of various frame types to Ringg AI WebSocket messages.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
frame: The Pipecat frame to serialize.
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Serialized data as JSON string, or None if the frame isn't handled.
|
|
123
|
+
"""
|
|
124
|
+
if isinstance(frame, InterruptionFrame):
|
|
125
|
+
# Send clear event to instruct client to discard buffered audio
|
|
126
|
+
answer = {"event": "clear", "stream_sid": self._stream_sid}
|
|
127
|
+
return json.dumps(answer)
|
|
128
|
+
|
|
129
|
+
elif isinstance(frame, CallTransferFrame):
|
|
130
|
+
# Send call_transfer event to transfer the call to another number
|
|
131
|
+
answer = {
|
|
132
|
+
"event": "call_transfer",
|
|
133
|
+
"call_sid": self._call_sid or self._stream_sid,
|
|
134
|
+
"to": frame.target,
|
|
135
|
+
}
|
|
136
|
+
return json.dumps(answer)
|
|
137
|
+
|
|
138
|
+
elif isinstance(frame, (EndFrame, CancelFrame)):
|
|
139
|
+
# Send hang_up event to end the call
|
|
140
|
+
answer = {"event": "hang_up", "stream_sid": self._stream_sid}
|
|
141
|
+
return json.dumps(answer)
|
|
142
|
+
|
|
143
|
+
elif isinstance(frame, AudioRawFrame):
|
|
144
|
+
data = frame.audio
|
|
145
|
+
|
|
146
|
+
# Convert audio based on codec
|
|
147
|
+
if self._codec == "pcmu":
|
|
148
|
+
# Convert PCM to μ-law for PCMU codec
|
|
149
|
+
serialized_data = await pcm_to_ulaw(
|
|
150
|
+
data, frame.sample_rate, self._custom_sample_rate, self._output_resampler
|
|
151
|
+
)
|
|
152
|
+
elif self._codec == "pcma":
|
|
153
|
+
# Convert PCM to A-law for PCMA codec
|
|
154
|
+
serialized_data = await pcm_to_alaw(
|
|
155
|
+
data, frame.sample_rate, self._custom_sample_rate, self._output_resampler
|
|
156
|
+
)
|
|
157
|
+
else: # pcm
|
|
158
|
+
# Resample PCM to target sample rate
|
|
159
|
+
serialized_data = await self._output_resampler.resample(
|
|
160
|
+
data, frame.sample_rate, self._custom_sample_rate
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
if serialized_data is None or len(serialized_data) == 0:
|
|
164
|
+
# Skip if no audio data
|
|
165
|
+
return None
|
|
166
|
+
|
|
167
|
+
payload = base64.b64encode(serialized_data).decode("ascii")
|
|
168
|
+
answer = {
|
|
169
|
+
"event": "media",
|
|
170
|
+
"stream_sid": self._stream_sid,
|
|
171
|
+
"media": {"payload": payload},
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
return json.dumps(answer)
|
|
175
|
+
|
|
176
|
+
elif isinstance(frame, (TransportMessageFrame, TransportMessageUrgentFrame)):
|
|
177
|
+
return json.dumps(frame.message)
|
|
178
|
+
|
|
179
|
+
return None
|
|
180
|
+
|
|
181
|
+
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
182
|
+
"""Deserializes Custom telephony WebSocket data to Pipecat frames.
|
|
183
|
+
|
|
184
|
+
Handles conversion of Ringg AI WebSocket events to appropriate Pipecat frames.
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
data: The raw WebSocket data from external client.
|
|
188
|
+
|
|
189
|
+
Returns:
|
|
190
|
+
A Pipecat frame corresponding to the event, or None if unhandled.
|
|
191
|
+
"""
|
|
192
|
+
try:
|
|
193
|
+
message = json.loads(data)
|
|
194
|
+
except json.JSONDecodeError as e:
|
|
195
|
+
logger.error(f"Failed to parse JSON message: {e}")
|
|
196
|
+
return None
|
|
197
|
+
|
|
198
|
+
event = message.get("event")
|
|
199
|
+
|
|
200
|
+
if event == "media":
|
|
201
|
+
media = message.get("media", {})
|
|
202
|
+
payload_base64 = media.get("payload")
|
|
203
|
+
uuid = message.get("uuid")
|
|
204
|
+
|
|
205
|
+
if not payload_base64:
|
|
206
|
+
logger.warning("Media event missing payload")
|
|
207
|
+
return None
|
|
208
|
+
|
|
209
|
+
try:
|
|
210
|
+
payload = base64.b64decode(payload_base64)
|
|
211
|
+
except Exception as e:
|
|
212
|
+
logger.error(f"Failed to decode base64 payload: {e}")
|
|
213
|
+
return None
|
|
214
|
+
|
|
215
|
+
# Convert audio based on codec
|
|
216
|
+
if self._codec == "pcmu":
|
|
217
|
+
# Convert μ-law to PCM
|
|
218
|
+
deserialized_data = await ulaw_to_pcm(
|
|
219
|
+
payload, self._custom_sample_rate, self._sample_rate, self._input_resampler
|
|
220
|
+
)
|
|
221
|
+
elif self._codec == "pcma":
|
|
222
|
+
# Convert A-law to PCM
|
|
223
|
+
deserialized_data = await alaw_to_pcm(
|
|
224
|
+
payload, self._custom_sample_rate, self._sample_rate, self._input_resampler
|
|
225
|
+
)
|
|
226
|
+
else: # pcm
|
|
227
|
+
# Resample PCM to pipeline sample rate
|
|
228
|
+
deserialized_data = await self._input_resampler.resample(
|
|
229
|
+
payload,
|
|
230
|
+
self._custom_sample_rate,
|
|
231
|
+
self._sample_rate,
|
|
232
|
+
)
|
|
233
|
+
|
|
234
|
+
if deserialized_data is None or len(deserialized_data) == 0:
|
|
235
|
+
# Skip if no audio data
|
|
236
|
+
return None
|
|
237
|
+
|
|
238
|
+
audio_frame = InputAudioRawFrame(
|
|
239
|
+
audio=deserialized_data,
|
|
240
|
+
num_channels=1, # Mono audio
|
|
241
|
+
sample_rate=self._sample_rate,
|
|
242
|
+
)
|
|
243
|
+
return audio_frame
|
|
244
|
+
|
|
245
|
+
elif event == "start":
|
|
246
|
+
# Log start event but don't generate a frame (handled by WebSocketService)
|
|
247
|
+
logger.debug(f"Received start event for stream {self._stream_sid}")
|
|
248
|
+
return None
|
|
249
|
+
|
|
250
|
+
elif event == "clear":
|
|
251
|
+
# External client requesting to clear our audio buffers
|
|
252
|
+
logger.debug(f"Received clear event for stream {self._stream_sid}")
|
|
253
|
+
return None
|
|
254
|
+
|
|
255
|
+
else:
|
|
256
|
+
logger.debug(f"Unhandled event type: {event} for stream {self._stream_sid}")
|
|
257
|
+
return None
|
pipecat/serializers/exotel.py
CHANGED
|
@@ -20,10 +20,10 @@ from pipecat.frames.frames import (
|
|
|
20
20
|
Frame,
|
|
21
21
|
InputAudioRawFrame,
|
|
22
22
|
InputDTMFFrame,
|
|
23
|
+
InterruptionFrame,
|
|
24
|
+
OutputTransportMessageFrame,
|
|
25
|
+
OutputTransportMessageUrgentFrame,
|
|
23
26
|
StartFrame,
|
|
24
|
-
StartInterruptionFrame,
|
|
25
|
-
TransportMessageFrame,
|
|
26
|
-
TransportMessageUrgentFrame,
|
|
27
27
|
)
|
|
28
28
|
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
|
29
29
|
|
|
@@ -98,7 +98,7 @@ class ExotelFrameSerializer(FrameSerializer):
|
|
|
98
98
|
Returns:
|
|
99
99
|
Serialized data as string or bytes, or None if the frame isn't handled.
|
|
100
100
|
"""
|
|
101
|
-
if isinstance(frame,
|
|
101
|
+
if isinstance(frame, InterruptionFrame):
|
|
102
102
|
answer = {"event": "clear", "streamSid": self._stream_sid}
|
|
103
103
|
return json.dumps(answer)
|
|
104
104
|
elif isinstance(frame, AudioRawFrame):
|
|
@@ -121,7 +121,7 @@ class ExotelFrameSerializer(FrameSerializer):
|
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
return json.dumps(answer)
|
|
124
|
-
elif isinstance(frame, (
|
|
124
|
+
elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
|
|
125
125
|
return json.dumps(frame.message)
|
|
126
126
|
|
|
127
127
|
return None
|
pipecat/serializers/livekit.py
CHANGED
|
@@ -25,11 +25,31 @@ except ModuleNotFoundError as e:
|
|
|
25
25
|
class LivekitFrameSerializer(FrameSerializer):
|
|
26
26
|
"""Serializer for converting between Pipecat frames and LiveKit audio frames.
|
|
27
27
|
|
|
28
|
+
.. deprecated:: 0.0.90
|
|
29
|
+
|
|
30
|
+
This class is deprecated and will be removed in a future version.
|
|
31
|
+
Please use LiveKitTransport instead, which handles audio streaming
|
|
32
|
+
and frame conversion natively.
|
|
33
|
+
|
|
28
34
|
This serializer handles the conversion of Pipecat's OutputAudioRawFrame objects
|
|
29
35
|
to LiveKit AudioFrame objects for transmission, and the reverse conversion
|
|
30
36
|
for received audio data.
|
|
31
37
|
"""
|
|
32
38
|
|
|
39
|
+
def __init__(self):
|
|
40
|
+
"""Initialize the LiveKit frame serializer."""
|
|
41
|
+
super().__init__()
|
|
42
|
+
import warnings
|
|
43
|
+
|
|
44
|
+
with warnings.catch_warnings():
|
|
45
|
+
warnings.simplefilter("always")
|
|
46
|
+
warnings.warn(
|
|
47
|
+
"LivekitFrameSerializer is deprecated and will be removed in a future version. "
|
|
48
|
+
"Please use LiveKitTransport instead, which handles audio streaming natively.",
|
|
49
|
+
DeprecationWarning,
|
|
50
|
+
stacklevel=2,
|
|
51
|
+
)
|
|
52
|
+
|
|
33
53
|
@property
|
|
34
54
|
def type(self) -> FrameSerializerType:
|
|
35
55
|
"""Get the serializer type.
|
pipecat/serializers/plivo.py
CHANGED
|
@@ -22,10 +22,10 @@ from pipecat.frames.frames import (
|
|
|
22
22
|
Frame,
|
|
23
23
|
InputAudioRawFrame,
|
|
24
24
|
InputDTMFFrame,
|
|
25
|
+
InterruptionFrame,
|
|
26
|
+
OutputTransportMessageFrame,
|
|
27
|
+
OutputTransportMessageUrgentFrame,
|
|
25
28
|
StartFrame,
|
|
26
|
-
StartInterruptionFrame,
|
|
27
|
-
TransportMessageFrame,
|
|
28
|
-
TransportMessageUrgentFrame,
|
|
29
29
|
)
|
|
30
30
|
from pipecat.serializers.base_serializer import FrameSerializer, FrameSerializerType
|
|
31
31
|
|
|
@@ -122,7 +122,7 @@ class PlivoFrameSerializer(FrameSerializer):
|
|
|
122
122
|
self._hangup_attempted = True
|
|
123
123
|
await self._hang_up_call()
|
|
124
124
|
return None
|
|
125
|
-
elif isinstance(frame,
|
|
125
|
+
elif isinstance(frame, InterruptionFrame):
|
|
126
126
|
answer = {"event": "clearAudio", "streamId": self._stream_id}
|
|
127
127
|
return json.dumps(answer)
|
|
128
128
|
elif isinstance(frame, AudioRawFrame):
|
|
@@ -148,7 +148,7 @@ class PlivoFrameSerializer(FrameSerializer):
|
|
|
148
148
|
}
|
|
149
149
|
|
|
150
150
|
return json.dumps(answer)
|
|
151
|
-
elif isinstance(frame, (
|
|
151
|
+
elif isinstance(frame, (OutputTransportMessageFrame, OutputTransportMessageUrgentFrame)):
|
|
152
152
|
return json.dumps(frame.message)
|
|
153
153
|
|
|
154
154
|
# Return None for unhandled frames
|