pipecat-asterisk 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pipecat_asterisk/__init__.py +10 -0
- pipecat_asterisk/py.typed +0 -0
- pipecat_asterisk/serializer/__init__.py +10 -0
- pipecat_asterisk/serializer/protocol.py +80 -0
- pipecat_asterisk/serializer/serializer.py +468 -0
- pipecat_asterisk/transport/__init__.py +10 -0
- pipecat_asterisk/transport/flow_controller.py +172 -0
- pipecat_asterisk/transport/transport.py +192 -0
- pipecat_asterisk-0.1.0.dist-info/METADATA +128 -0
- pipecat_asterisk-0.1.0.dist-info/RECORD +11 -0
- pipecat_asterisk-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2026, Nikolai Shakin
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from .serializer.serializer import AsteriskFrameSerializer
|
|
8
|
+
from .transport.transport import AsteriskWebsocketTransport
|
|
9
|
+
|
|
10
|
+
__all__ = ["AsteriskFrameSerializer", "AsteriskWebsocketTransport"]
|
|
File without changes
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2026, Nikolai Shakin
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from loguru import logger
|
|
8
|
+
import json
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class AsteriskWSProtocol:
|
|
12
|
+
"""Asterisk WebSocket protocol handler.
|
|
13
|
+
|
|
14
|
+
The protocol learns Asterisk web_socket channel subprotocol (json or plain-text) based on the first event "MEDIA_START", if it is not set explicitly.
|
|
15
|
+
It parses events accordingly to the identified subprotocol and returns dictionary objects.
|
|
16
|
+
It builds commands to Asterisk in the identified subprotocol.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
def __init__(self, subprotocol: str | None = None):
|
|
20
|
+
self._sub_protocol = subprotocol
|
|
21
|
+
if self._sub_protocol is not None and self._sub_protocol not in [
|
|
22
|
+
"json",
|
|
23
|
+
"plain-text",
|
|
24
|
+
]:
|
|
25
|
+
raise ValueError(
|
|
26
|
+
f"Invalid subprotocol for AsteriskWSProtocol: {self._sub_protocol}, it should be either 'json' or 'plain-text' or None for autodetect."
|
|
27
|
+
)
|
|
28
|
+
if self._sub_protocol is None:
|
|
29
|
+
logger.debug(
|
|
30
|
+
"Asterisk subprotocol ['json' or 'plain-text'] is not defined, we will try to learn it automatically based on 'MEDIA_START' event."
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
def parse(self, event: str) -> dict | None:
|
|
34
|
+
"""Parses the event from Asterisk WebSocket channel and return a dictionary object."""
|
|
35
|
+
if self._sub_protocol is None:
|
|
36
|
+
# If subprotocol is not set, we attempt to identify the format based on the first event we receive, which should be "MEDIA_START"
|
|
37
|
+
if "MEDIA_START" in event:
|
|
38
|
+
try:
|
|
39
|
+
json.loads(event)
|
|
40
|
+
self._sub_protocol = "json"
|
|
41
|
+
logger.debug(
|
|
42
|
+
"Identified Asterisk subprotocol as JSON based on MEDIA_START event format."
|
|
43
|
+
)
|
|
44
|
+
except json.JSONDecodeError:
|
|
45
|
+
self._sub_protocol = "plain-text"
|
|
46
|
+
logger.debug(
|
|
47
|
+
"Identified Asterisk subprotocol as plain-text based on MEDIA_START event format. Notice: In plain-text format you will not be able to read Asterisk channel variables."
|
|
48
|
+
)
|
|
49
|
+
else:
|
|
50
|
+
logger.warning(
|
|
51
|
+
f'We tried to auto-detect the Asterisk subprotocol but, received the event before "MEDIA_START". Event: {event}'
|
|
52
|
+
)
|
|
53
|
+
return None
|
|
54
|
+
|
|
55
|
+
if self._sub_protocol == "json":
|
|
56
|
+
try:
|
|
57
|
+
return json.loads(event)
|
|
58
|
+
except json.JSONDecodeError as e:
|
|
59
|
+
logger.error(
|
|
60
|
+
f"Failed to parse Asterisk WebSocket event as JSON: {event}"
|
|
61
|
+
)
|
|
62
|
+
raise e
|
|
63
|
+
else:
|
|
64
|
+
event_entries = event.split(" ")
|
|
65
|
+
if event_entries[0] == "":
|
|
66
|
+
logger.warning(f"Received empty event from Asterisk WebSocket channel.")
|
|
67
|
+
return None
|
|
68
|
+
event_dict = {"event": event_entries.pop(0)}
|
|
69
|
+
for entry in event_entries:
|
|
70
|
+
if ":" in entry:
|
|
71
|
+
key, value = entry.split(":", 1)
|
|
72
|
+
event_dict[key] = value
|
|
73
|
+
return event_dict
|
|
74
|
+
|
|
75
|
+
def build(self, command: str) -> str:
|
|
76
|
+
"""Returns a properly formatted command for Asterisk WebSocket channel based on the identified subprotocol."""
|
|
77
|
+
if self._sub_protocol == "plain-text":
|
|
78
|
+
return command
|
|
79
|
+
else:
|
|
80
|
+
return f'{{"command": "{command}"}}'
|
|
@@ -0,0 +1,468 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2026, Nikolai Shakin
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
import inspect
|
|
8
|
+
from typing import Awaitable, Callable, Optional, cast
|
|
9
|
+
from loguru import logger
|
|
10
|
+
from pipecat.audio.dtmf.types import KeypadEntry
|
|
11
|
+
from pipecat.audio.utils import create_stream_resampler
|
|
12
|
+
from pipecat.frames.frames import (
|
|
13
|
+
CancelFrame,
|
|
14
|
+
EndFrame,
|
|
15
|
+
Frame,
|
|
16
|
+
InputAudioRawFrame,
|
|
17
|
+
InputDTMFFrame,
|
|
18
|
+
InputTransportMessageFrame,
|
|
19
|
+
InterruptionFrame,
|
|
20
|
+
OutputAudioRawFrame,
|
|
21
|
+
StartFrame,
|
|
22
|
+
)
|
|
23
|
+
from pipecat.serializers.base_serializer import FrameSerializer
|
|
24
|
+
|
|
25
|
+
from .protocol import AsteriskWSProtocol
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class AsteriskCommandFrame(Frame):
|
|
29
|
+
"""A frame representing a command to be sent to Asterisk WebSocket channel."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, cmd: str):
|
|
32
|
+
self.cmd = cmd
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class AsteriskFrameSerializer(FrameSerializer):
|
|
36
|
+
"""Asterisk WebSocket Serializer: Serializer for Asterisk WebSocket channel.
|
|
37
|
+
|
|
38
|
+
This serializer handles converting between Pipecat frames and Asterisk's WebSocket
|
|
39
|
+
channel events/commands and binary audio data and vice versa.
|
|
40
|
+
Asterisk to Pipecat:
|
|
41
|
+
- when DTMF detected on Asterisk websocket channel we send InputDTMFFrame to Pipecat.
|
|
42
|
+
- when MEDIA_START event is received we send InputTransportMessageFrame to Pipecat with the event message as the payload,
|
|
43
|
+
so the pipeline can use the information provided in that event (e.g., channel variables) and also know when the media starts flowing.
|
|
44
|
+
- when MEDIA_XOFF event is received we log a warning that Asterisk is asking us to pause sending media, normally it should not happen if the transport implements flow control correctly.
|
|
45
|
+
- when MEDIA_XON event is received we log that Asterisk is ready to receive media again after a MEDIA_XOFF event, again it's for information, it's not used.
|
|
46
|
+
- when QUEUE_DRAINED event is received we send InputTransportMessageFrame to Pipecat with the event message as the payload,
|
|
47
|
+
so the pipeline can know when Asterisk finished processing all the queued media, which might be useful to know when Asterisk finished playing all the TTS audio.
|
|
48
|
+
- when binary audio data is received on Asterisk websocket channel we convert it to InputAudioRawFrame and send to Pipecat, after resampling if needed.
|
|
49
|
+
Pipecat to Asterisk:
|
|
50
|
+
- when an EndFrame or CancelFrame is processed we send HANGUP to Asterisk websocket channel.
|
|
51
|
+
- when an InterruptionFrame is processed we send FLUSH_MEDIA to Asterisk websocket channel.
|
|
52
|
+
- when an OutputAudioRawFrame is processed we send the raw audio bytes to Asterisk websocket channel, after resampling if needed.
|
|
53
|
+
|
|
54
|
+
Some of the event handlers are just placeholders for now, they just log the received events, but they can be extended if needed.
|
|
55
|
+
In case you need to add more event handlers you can add more methods with the naming convention "_ev_{event_name.lower()}"
|
|
56
|
+
and they will be called automatically when the corresponding event is received from Asterisk.
|
|
57
|
+
The same applies for frame handlers, you can add more methods with the naming convention "_frame_{frame_type.lower()}"
|
|
58
|
+
and they will be called automatically when the corresponding frame type is processed in serialize method.
|
|
59
|
+
"""
|
|
60
|
+
|
|
61
|
+
# Asterisk slin sample rates supported by default, you can check it on your Asterisk with CLI>'core show codecs audio'
|
|
62
|
+
SUPPORTED_SAMPLE_RATES = [12, 16, 24, 32, 44, 48, 96, 192, 128] # kHz
|
|
63
|
+
|
|
64
|
+
def __init__(self, sample_rate: int = 0):
|
|
65
|
+
"""Initialize the Asterisk WebSocket Serializer.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
sample_rate: Sample rate in kHz used by Asterisk, defaults to 0 (will be populated during setup or from MEDIA_START event).
|
|
69
|
+
"""
|
|
70
|
+
|
|
71
|
+
self._asterisk_ws_proto = AsteriskWSProtocol()
|
|
72
|
+
self._input_resampler = None # Will be initialized if resampling is needed
|
|
73
|
+
self._output_resampler = None # Will be initialized if resampling is needed
|
|
74
|
+
self._pipeline_in_sample_rate = 0 # What rate should we send to the pipeline (and STT-like processors). Will be populated during setup
|
|
75
|
+
self._pipeline_out_sample_rate = 0 # What rate should we expect to receive from TTS-like processors. Will be populated during setup
|
|
76
|
+
self._asterisk_sample_rate = int(
|
|
77
|
+
sample_rate
|
|
78
|
+
) # What sample rate is used in Asterisk websocket channel. If 0, will be populated during setup or from MEDIA_START event
|
|
79
|
+
|
|
80
|
+
def _handle_event(self, message: dict) -> Frame | None:
|
|
81
|
+
"""Call the event handler if the handler is defined in the class, otherwise return None.
|
|
82
|
+
|
|
83
|
+
The handler methods should be named as "_ev_{event_name.lower()}" and should take the event message as a dictionary and return a Frame or None.
|
|
84
|
+
|
|
85
|
+
Args:
|
|
86
|
+
message: The event message as a dictionary.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
message_type = message.get("event", None)
|
|
90
|
+
if message_type is None:
|
|
91
|
+
logger.warning(
|
|
92
|
+
f"Received Asterisk WebSocket message without 'event' field: {message}"
|
|
93
|
+
)
|
|
94
|
+
return None
|
|
95
|
+
handler = getattr(self, f"_ev_{message_type.lower()}", None)
|
|
96
|
+
if callable(handler):
|
|
97
|
+
typed_handler = cast(Callable[[dict], Frame | None], handler)
|
|
98
|
+
return typed_handler(message)
|
|
99
|
+
else:
|
|
100
|
+
logger.info(f"Received unhandled Asterisk WebSocket event: {message}")
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
### Asterisk Event handlers ###
|
|
104
|
+
|
|
105
|
+
def _ev_media_start(self, message: dict) -> Frame | None:
|
|
106
|
+
"""MEDIA_START event handler.
|
|
107
|
+
|
|
108
|
+
MEDIA_START event is the first one we receive from Asterisk.
|
|
109
|
+
There are a few potentially useful parameters provided by Asterisk in the MEDIA_START event message:
|
|
110
|
+
connection_id: A UUID that will be set on the MEDIA_WEBSOCKET_CONNECTION_ID channel variable.
|
|
111
|
+
channel: The channel name on Asterisk.
|
|
112
|
+
channel_id: The channel's unique id on Asterisk.
|
|
113
|
+
format: The audio format set on the channel.
|
|
114
|
+
optimal_frame_size: The optimal frame size from Astersisk's perspective.
|
|
115
|
+
ptime: The packet size in milliseconds.
|
|
116
|
+
channel_variables: An object containing the variables currently set on the channel.
|
|
117
|
+
The latest can be very handy for moving data from dialplan/channel variables to Pipecat.
|
|
118
|
+
However, it's only available in JSON subprotocol, in plain-text subprotocol you will not have access to channel variables.
|
|
119
|
+
So if you need channel variables make sure to use JSON subprotocol on Asterisk WebSocket channel.
|
|
120
|
+
We send MEDIA_START event object to the pipeline as InputTransportMessageFrame, so the pipeline "knows" about the media parameters.
|
|
121
|
+
|
|
122
|
+
Args:
|
|
123
|
+
message: The dictionary representing of the MEDIA_START event message from Asterisk.
|
|
124
|
+
"""
|
|
125
|
+
# Check if codec is slin
|
|
126
|
+
format = message.get("format", "").strip().lower()
|
|
127
|
+
if not format.startswith("slin"):
|
|
128
|
+
# Some Pipecat transports do transcoding on the fly in Pipecat, but this one doesn't for two reasons:
|
|
129
|
+
# 1. Pipecat AudioFrame has to be in slin format, and Asterisk can send all the flavors of slin out-of-the-box.
|
|
130
|
+
# 2. Asterisk is way more efficient in transcoding, it makes no sense to send non-slin audio to Pipecat and transcoding it there.
|
|
131
|
+
raise ValueError(
|
|
132
|
+
f"Unsupported audio format in Asterisk MEDIA_START event: [{message.get('format')}], we only support slin format for now. Please use make sure that Asterisk channel is configured to use slin[12..192]."
|
|
133
|
+
)
|
|
134
|
+
|
|
135
|
+
# Check if sample rate is defined
|
|
136
|
+
if self._asterisk_sample_rate == 0:
|
|
137
|
+
sample_rate = format[4:]
|
|
138
|
+
if sample_rate:
|
|
139
|
+
if not sample_rate.isdigit():
|
|
140
|
+
raise ValueError(
|
|
141
|
+
f"Invalid sample rate in Asterisk MEDIA_START event: [{sample_rate}] kHz. Sample rate should be a number in kHz, e.g., 'slin16' for 16000 Hz sample rate."
|
|
142
|
+
)
|
|
143
|
+
# Check if sample rate is in the supported list, if not raise an error
|
|
144
|
+
sample_rate = int(sample_rate)
|
|
145
|
+
if sample_rate not in self.SUPPORTED_SAMPLE_RATES:
|
|
146
|
+
raise ValueError(
|
|
147
|
+
f"Unsupported sample rate in Asterisk MEDIA_START event: [{sample_rate}] kHz. Supported sample rates for slin format are {self.SUPPORTED_SAMPLE_RATES} kHz."
|
|
148
|
+
)
|
|
149
|
+
else:
|
|
150
|
+
sample_rate = 8
|
|
151
|
+
|
|
152
|
+
self._asterisk_sample_rate = sample_rate * 1000
|
|
153
|
+
|
|
154
|
+
logger.info(f"Received MEDIA_START event from Asterisk: {message}")
|
|
155
|
+
|
|
156
|
+
# Check if input resampling is needed
|
|
157
|
+
if self._pipeline_in_sample_rate != self._asterisk_sample_rate:
|
|
158
|
+
logger.warning(
|
|
159
|
+
f"Asterisk sample rate: ({self._asterisk_sample_rate} Hz) != pipeline input sample rate ({self._pipeline_in_sample_rate} Hz). Please, try to avoid resampling when possible."
|
|
160
|
+
)
|
|
161
|
+
self._input_resampler = self.create_resampler("input")
|
|
162
|
+
|
|
163
|
+
# Check if output resampling is needed
|
|
164
|
+
if self._pipeline_out_sample_rate != self._asterisk_sample_rate:
|
|
165
|
+
logger.warning(
|
|
166
|
+
f"Asterisk sample rate: ({self._asterisk_sample_rate} Hz) != pipeline output sample rate ({self._pipeline_out_sample_rate} Hz). Please, try to avoid resampling when possible."
|
|
167
|
+
)
|
|
168
|
+
self._output_resampler = self.create_resampler("output")
|
|
169
|
+
|
|
170
|
+
return InputTransportMessageFrame(message=message)
|
|
171
|
+
|
|
172
|
+
def _ev_media_xoff(self, message: dict) -> Frame | None:
|
|
173
|
+
"""MEDIA_XOFF event handler.
|
|
174
|
+
|
|
175
|
+
The Asterisk's websocket channel driver will send this event when the frame queue length reaches the high water (XOFF) level.
|
|
176
|
+
Any media sent after this has a high probability of being dropped. We don't use them in our flow control implementation,
|
|
177
|
+
but getting this message means that our flow control implementation failed to keep the remote buffer under the high water mark, so we log a warning about it.
|
|
178
|
+
|
|
179
|
+
Args:
|
|
180
|
+
message: The dictionary representing of the MEDIA_XOFF event message from Asterisk.
|
|
181
|
+
"""
|
|
182
|
+
logger.error(
|
|
183
|
+
f"Received MEDIA_XOFF event from Asterisk: {message}. Oops, we hit the high water mark, probably Asterisk will drop the following audio frames."
|
|
184
|
+
)
|
|
185
|
+
return None
|
|
186
|
+
|
|
187
|
+
def _ev_media_xon(self, message: dict) -> Frame | None:
|
|
188
|
+
"""MEDIA_XON event handler.
|
|
189
|
+
|
|
190
|
+
The Asterisk's websocket channel driver will send this event when the frame queue length drops below the low water (XON) level.
|
|
191
|
+
The app can then resume sending media. Again, out transport implements flow control to avoid reaching this point
|
|
192
|
+
and it doesn't rely on these events for implementing flow control.
|
|
193
|
+
|
|
194
|
+
Args:
|
|
195
|
+
message: The dictionary representing of the MEDIA_XON event message from Asterisk.
|
|
196
|
+
"""
|
|
197
|
+
logger.debug(
|
|
198
|
+
f"Received MEDIA_XON event from Asterisk: {message}. Asterisk audio buffer is ready to receive audio again."
|
|
199
|
+
)
|
|
200
|
+
return None
|
|
201
|
+
|
|
202
|
+
def _ev_dtmf_end(self, message: dict) -> Frame | None:
|
|
203
|
+
"""DTMF_END event handler.
|
|
204
|
+
|
|
205
|
+
Handles DTMF_END events from Asterisk and converts them to InputDTMFFrame.
|
|
206
|
+
|
|
207
|
+
Args:
|
|
208
|
+
message: The dictionary representing of the DTMF_END event message from Asterisk.
|
|
209
|
+
|
|
210
|
+
Returns:
|
|
211
|
+
An InputDTMFFrame if a valid DTMF digit is found, otherwise None.
|
|
212
|
+
"""
|
|
213
|
+
digit = message.get("digit")
|
|
214
|
+
if digit:
|
|
215
|
+
try:
|
|
216
|
+
return InputDTMFFrame(KeypadEntry(digit))
|
|
217
|
+
except ValueError:
|
|
218
|
+
# Handle case where string doesn't match any enum value
|
|
219
|
+
logger.warning(f"Invalid DTMF digit received: {digit}")
|
|
220
|
+
return None
|
|
221
|
+
return None
|
|
222
|
+
|
|
223
|
+
def _ev_queue_drained(self, message: dict) -> Frame | None:
|
|
224
|
+
# TODO: add REPORT_QUEUE_DRAINED support in the transport
|
|
225
|
+
"""QUEUE_DRAINED event handler.
|
|
226
|
+
|
|
227
|
+
Handles QUEUE_DRAINED events from Asterisk. This event indicates that Asterisk has processed all the queued media.
|
|
228
|
+
We will only receive this event if we requested it by sending "REPORT_QUEUE_DRAINED", and only once per one "REPORT_QUEUE_DRAINED".
|
|
229
|
+
Effectively, this means that Asterisk stopped playing audio to the channel(bot stopped speaking), which might be good to know in Pipecat.
|
|
230
|
+
However, sending "REPORT_QUEUE_DRAINED" is currently (April 2026) not used by the transport, so you unlikely will receive this event.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
message: The dictionary representing of the QUEUE_DRAINED event message from Asterisk.
|
|
234
|
+
"""
|
|
235
|
+
logger.debug(
|
|
236
|
+
f"Received QUEUE_DRAINED event from Asterisk: {message}. Asterisk has processed all the queued media."
|
|
237
|
+
)
|
|
238
|
+
return InputTransportMessageFrame(message=message)
|
|
239
|
+
|
|
240
|
+
#### Pipecat Frame handlers ####
|
|
241
|
+
|
|
242
|
+
async def _frame_outputaudiorawframe(
|
|
243
|
+
self, frame: OutputAudioRawFrame
|
|
244
|
+
) -> Optional[bytes]:
|
|
245
|
+
"""OutputAudioRawFrame handler.
|
|
246
|
+
|
|
247
|
+
This handler extracts raw audio bytes from the OutputAudioRawFrame, resamples it if needed, and returns the raw audio bytes to be sent to Asterisk WebSocket channel.
|
|
248
|
+
|
|
249
|
+
Args:
|
|
250
|
+
frame: The OutputAudioRawFrame to be processed.
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
data = frame.audio
|
|
254
|
+
|
|
255
|
+
if not data or len(data) == 0:
|
|
256
|
+
logger.debug("OutputAudioRawFrame contains no audio data to serialize.")
|
|
257
|
+
return None
|
|
258
|
+
|
|
259
|
+
if self._pipeline_out_sample_rate != frame.sample_rate:
|
|
260
|
+
logger.warning(
|
|
261
|
+
f"OutputAudioRawFrame sample rate ({frame.sample_rate} Hz) != pipeline output sample rate ({self._pipeline_out_sample_rate} Hz). We can't resample the audio frame properly."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
if frame.num_channels != 1:
|
|
265
|
+
logger.warning(
|
|
266
|
+
f"OutputAudioRawFrame has {frame.num_channels} channels, but Asterisk WebSocket channel only supports mono audio. We can't send this audio frame to Asterisk."
|
|
267
|
+
)
|
|
268
|
+
return None
|
|
269
|
+
|
|
270
|
+
if self._asterisk_sample_rate == self._pipeline_out_sample_rate:
|
|
271
|
+
logger.trace("Forwarding audio frame without resampling.")
|
|
272
|
+
return data
|
|
273
|
+
else:
|
|
274
|
+
if self._output_resampler is None:
|
|
275
|
+
logger.warning(
|
|
276
|
+
"Resampling is required but output resampler is not initialized, we can't resample the audio."
|
|
277
|
+
)
|
|
278
|
+
return None
|
|
279
|
+
else:
|
|
280
|
+
logger.trace(
|
|
281
|
+
f"Resampling audio from {self._pipeline_out_sample_rate} Hz to Asterisk sample rate {self._asterisk_sample_rate} Hz before sending to Asterisk."
|
|
282
|
+
)
|
|
283
|
+
resampled_audio = await self._output_resampler(data)
|
|
284
|
+
if resampled_audio is None or len(resampled_audio) == 0:
|
|
285
|
+
logger.trace("Resampled audio contains no data.")
|
|
286
|
+
return None
|
|
287
|
+
return resampled_audio
|
|
288
|
+
|
|
289
|
+
def _frame_asteriskcommandframe(self, frame: AsteriskCommandFrame) -> str:
|
|
290
|
+
"""AsteriskCommandFrame handler.
|
|
291
|
+
|
|
292
|
+
Returns properly formatted arbitrary command for Asterisk WebSocket channel when an AsteriskCommandFrame is processed, using the command string provided in the frame.
|
|
293
|
+
|
|
294
|
+
Args:
|
|
295
|
+
frame: The AsteriskCommandFrame to be processed.
|
|
296
|
+
"""
|
|
297
|
+
return self._asterisk_ws_proto.build(frame.cmd)
|
|
298
|
+
|
|
299
|
+
def _frame_endframe(self, frame: EndFrame) -> str:
|
|
300
|
+
"""EndFrame handler. Terminate the call on Asterisk by sending HANGUP command when an EndFrame is processed.
|
|
301
|
+
|
|
302
|
+
Returns properly formatted HANGUP command for Asterisk WebSocket channel when an EndFrame is processed, indicating that the call should be terminated.
|
|
303
|
+
|
|
304
|
+
Args:
|
|
305
|
+
frame: The EndFrame to be processed.
|
|
306
|
+
"""
|
|
307
|
+
return self._asterisk_ws_proto.build("HANGUP")
|
|
308
|
+
|
|
309
|
+
def _frame_cancelframe(self, frame: CancelFrame) -> str:
|
|
310
|
+
"""CancelFrame handler. Terminate the call on Asterisk by sending HANGUP command when a CancelFrame is processed.
|
|
311
|
+
|
|
312
|
+
Returns properly formatted HANGUP command for Asterisk WebSocket channel when a CancelFrame is processed, indicating that the call should be terminated.
|
|
313
|
+
|
|
314
|
+
Args:
|
|
315
|
+
frame: The CancelFrame to be processed.
|
|
316
|
+
"""
|
|
317
|
+
return self._asterisk_ws_proto.build("HANGUP")
|
|
318
|
+
|
|
319
|
+
def _frame_interruptionframe(self, frame: InterruptionFrame) -> str:
|
|
320
|
+
"""InterruptionFrame handler.
|
|
321
|
+
|
|
322
|
+
Returns properly formatted FLUSH_MEDIA command for Asterisk WebSocket channel when an InterruptionFrame is processed,
|
|
323
|
+
indicating that the buffered media on Asterisk should be flushed (bot stops speaking immediately).
|
|
324
|
+
|
|
325
|
+
Args:
|
|
326
|
+
frame: The InterruptionFrame to be processed.
|
|
327
|
+
"""
|
|
328
|
+
return self._asterisk_ws_proto.build("FLUSH_MEDIA")
|
|
329
|
+
|
|
330
|
+
### Utility methods ###
|
|
331
|
+
|
|
332
|
+
def create_resampler(self, direction: str) -> Callable[[bytes], Awaitable[bytes]]:
|
|
333
|
+
"""Create a resampler function to convert audio between different sample rates.
|
|
334
|
+
|
|
335
|
+
Args:
|
|
336
|
+
input_sample_rate: The sample rate of the input audio.
|
|
337
|
+
output_sample_rate: The sample rate of the output audio.
|
|
338
|
+
|
|
339
|
+
Returns:
|
|
340
|
+
A function that takes raw audio bytes as input and returns resampled audio bytes.
|
|
341
|
+
"""
|
|
342
|
+
|
|
343
|
+
if direction not in ["input", "output"]:
|
|
344
|
+
raise ValueError(
|
|
345
|
+
f"Invalid direction for resampler: {direction}, it should be either 'input' or 'output'."
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
if direction == "input":
|
|
349
|
+
resampler_input_rate = self._asterisk_sample_rate
|
|
350
|
+
resampler_output_rate = self._pipeline_in_sample_rate
|
|
351
|
+
else:
|
|
352
|
+
resampler_input_rate = self._pipeline_out_sample_rate
|
|
353
|
+
resampler_output_rate = self._asterisk_sample_rate
|
|
354
|
+
|
|
355
|
+
if resampler_input_rate == resampler_output_rate:
|
|
356
|
+
# No resampling needed, return dummy function
|
|
357
|
+
logger.warning(
|
|
358
|
+
f"Dummy resampler created for [{direction}] direction, in_rate ({resampler_input_rate} Hz), out_rate ({resampler_output_rate} Hz)."
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
async def dummy(audio) -> bytes:
|
|
362
|
+
return audio
|
|
363
|
+
|
|
364
|
+
return dummy
|
|
365
|
+
else:
|
|
366
|
+
# Create the stateful instance of resampler
|
|
367
|
+
resampler = create_stream_resampler()
|
|
368
|
+
|
|
369
|
+
# Wrapper for that instance
|
|
370
|
+
async def wrap_resample(audio) -> bytes:
|
|
371
|
+
return await resampler.resample(
|
|
372
|
+
audio, resampler_input_rate, resampler_output_rate
|
|
373
|
+
)
|
|
374
|
+
|
|
375
|
+
return wrap_resample
|
|
376
|
+
|
|
377
|
+
### FrameSerializer interface implementation ###
|
|
378
|
+
|
|
379
|
+
async def setup(self, frame: StartFrame):
|
|
380
|
+
"""Initialize the serializer with startup configuration.
|
|
381
|
+
|
|
382
|
+
Defined to set the pipeline input sample rate for resampling.
|
|
383
|
+
|
|
384
|
+
Args:
|
|
385
|
+
frame: StartFrame containing initialization parameters.
|
|
386
|
+
"""
|
|
387
|
+
self._pipeline_in_sample_rate = frame.audio_in_sample_rate
|
|
388
|
+
self._pipeline_out_sample_rate = frame.audio_out_sample_rate
|
|
389
|
+
|
|
390
|
+
if self._pipeline_in_sample_rate != self._pipeline_out_sample_rate:
|
|
391
|
+
logger.warning(
|
|
392
|
+
f"Pipeline input sample rate ({self._pipeline_in_sample_rate} Hz) != output sample rate ({self._pipeline_out_sample_rate} Hz). Please try to avoid resampling when possible."
|
|
393
|
+
)
|
|
394
|
+
|
|
395
|
+
async def serialize(self, frame: Frame) -> str | bytes | None:
|
|
396
|
+
"""Convert a frame to its serialized representation suitable for Asterisk WebSocket channel.
|
|
397
|
+
|
|
398
|
+
Args:
|
|
399
|
+
frame: The frame to serialize.
|
|
400
|
+
|
|
401
|
+
Returns:
|
|
402
|
+
Serialized frame data as string, bytes, or None if serialization fails.
|
|
403
|
+
"""
|
|
404
|
+
handler = getattr(self, f"_frame_{type(frame).__name__.lower()}", None)
|
|
405
|
+
if callable(handler):
|
|
406
|
+
result = handler(frame)
|
|
407
|
+
if inspect.isawaitable(result):
|
|
408
|
+
return cast(str | bytes | None, await result)
|
|
409
|
+
else:
|
|
410
|
+
return cast(str | bytes | None, result)
|
|
411
|
+
else:
|
|
412
|
+
logger.trace(
|
|
413
|
+
f"Received unhandled frame type in Asterisk WebSocket serializer: {type(frame)}. Frame: {frame}"
|
|
414
|
+
)
|
|
415
|
+
return None
|
|
416
|
+
|
|
417
|
+
async def deserialize(self, data: str | bytes) -> Frame | None:
|
|
418
|
+
"""Convert serialized data from Asterisk's websocket channel to a frame object.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
data: Serialized frame data as string or bytes.
|
|
422
|
+
|
|
423
|
+
Returns:
|
|
424
|
+
Reconstructed Frame object, or None if deserialization fails.
|
|
425
|
+
"""
|
|
426
|
+
|
|
427
|
+
# Handle audio
|
|
428
|
+
if isinstance(data, bytes):
|
|
429
|
+
# Check if input resampling is needed
|
|
430
|
+
if self._pipeline_in_sample_rate != self._asterisk_sample_rate:
|
|
431
|
+
if self._input_resampler is None:
|
|
432
|
+
logger.warning(
|
|
433
|
+
"Resampling is required but input resampler is not initialized, we can't resample the audio frame."
|
|
434
|
+
)
|
|
435
|
+
return None
|
|
436
|
+
else:
|
|
437
|
+
logger.trace(
|
|
438
|
+
f"Resampling audio from Asterisk sample rate {self._asterisk_sample_rate} Hz to pipeline input sample rate {self._pipeline_in_sample_rate} Hz."
|
|
439
|
+
)
|
|
440
|
+
resampled_audio = await self._input_resampler(data)
|
|
441
|
+
if resampled_audio is None or len(resampled_audio) == 0:
|
|
442
|
+
logger.trace("Resampled audio contains no data.")
|
|
443
|
+
return None
|
|
444
|
+
else:
|
|
445
|
+
logger.trace("Forwarding audio from Asterisk without resampling.")
|
|
446
|
+
resampled_audio = data
|
|
447
|
+
return InputAudioRawFrame(
|
|
448
|
+
audio=resampled_audio,
|
|
449
|
+
num_channels=1,
|
|
450
|
+
sample_rate=self._pipeline_in_sample_rate,
|
|
451
|
+
)
|
|
452
|
+
|
|
453
|
+
# Handle events
|
|
454
|
+
elif isinstance(data, str):
|
|
455
|
+
event = self._asterisk_ws_proto.parse(data)
|
|
456
|
+
|
|
457
|
+
if event is not None:
|
|
458
|
+
return self._handle_event(event)
|
|
459
|
+
else:
|
|
460
|
+
logger.warning(
|
|
461
|
+
f"Failed to parse Asterisk WebSocket event from data: {data}"
|
|
462
|
+
)
|
|
463
|
+
return None
|
|
464
|
+
else:
|
|
465
|
+
logger.warning(
|
|
466
|
+
f"Received data of unsupported type from Asterisk WebSocket channel: {type(data)}. Data: {data}"
|
|
467
|
+
)
|
|
468
|
+
return None
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2026, Nikolai Shakin
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
from loguru import logger
|
|
9
|
+
import time
|
|
10
|
+
from pipecat.transports.websocket.fastapi import (
|
|
11
|
+
FastAPIWebsocketClient,
|
|
12
|
+
)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class FlowController:
|
|
16
|
+
"""Controls the flow of audio frames sent over the WebSocket connection.
|
|
17
|
+
|
|
18
|
+
It manages a local buffer and estimates the utilization of the remote buffer
|
|
19
|
+
on the Asterisk side to prevent buffer overflow and audio under-runs. Audio
|
|
20
|
+
chunks are dispatched in batches based on configured low and high water marks,
|
|
21
|
+
ensuring smooth playback while respecting WebSocket message size limits.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
# Percentages of the remote buffer size to use as low and high water marks for flow control.
|
|
25
|
+
REMOTE_BUFFER_LOW_WATER = 0.1
|
|
26
|
+
REMOTE_BUFFER_HIGH_WATER = 0.8
|
|
27
|
+
|
|
28
|
+
# Size of the remote buffer in frames(or audio chunks) of psize. It's currently (Apr 2026) hardcoded on Asterisk side.
|
|
29
|
+
REMOTE_BUFFER_SIZE = 1000
|
|
30
|
+
|
|
31
|
+
# Minimum number of bytes to send when the remote buffer is in working range (between low and high water marks).
|
|
32
|
+
# We don't need to send data in small chunks in every tick if the buffer is loaded on the remote side.
|
|
33
|
+
# 50 frames is about 1 second of audio at 20ms ptime, so after we reached the low water mark we will send audio in batches of at least one second of audio.
|
|
34
|
+
# So with the default values we will have at least 20ms*1000*0.1 = 2 seconds of audio in the remote buffer before we start sending audio in batches,
|
|
35
|
+
# and we will send at least 1 second of audio every time we send while the remote buffer is between 20% and 80% full.
|
|
36
|
+
# If the remote buffer is above 80% full we will stop sending until it goes below 80% again.
|
|
37
|
+
# If the remote buffer is below 20% full we will send whatever we have in the local buffer without waiting for the minimum batch size to be reached,
|
|
38
|
+
# to quickly fill the remote buffer and avoid buffer under-utilization.
|
|
39
|
+
MIN_BATCH = 50
|
|
40
|
+
|
|
41
|
+
# Based on Asterisk documentation.The maximum websocket message size the underlying websocket code can handle is 65500 bytes.
|
|
42
|
+
# We need to ensure we don't exceed this limit when sending audio chunks. However, if the size is 65500 - it kills the session with the following error
|
|
43
|
+
# DEBUG[22367][C-00000050]: chan_websocket.c:1107 read_from_ws_and_queue: WebSocket/pipecat/0xfffee4009f58: WebSocket read error
|
|
44
|
+
# To be safe we use 50000 bytes, it worked in tests without issues.
|
|
45
|
+
MAX_WS_SEND = 50000
|
|
46
|
+
|
|
47
|
+
def __init__(
|
|
48
|
+
self, ptime: int, psize: int, websocket_client: FastAPIWebsocketClient
|
|
49
|
+
):
|
|
50
|
+
self._ptime = ptime # Audio chunk duration. In milliseconds
|
|
51
|
+
self._psize = psize # Audio chunk size. In bytes
|
|
52
|
+
self._websocket_client = websocket_client
|
|
53
|
+
self._local_buffer = bytearray()
|
|
54
|
+
self._remote_buffer_low_water = (
|
|
55
|
+
self.REMOTE_BUFFER_LOW_WATER * self.REMOTE_BUFFER_SIZE * self._psize
|
|
56
|
+
)
|
|
57
|
+
self._remote_buffer_high_water = (
|
|
58
|
+
self.REMOTE_BUFFER_HIGH_WATER * self.REMOTE_BUFFER_SIZE * self._psize
|
|
59
|
+
)
|
|
60
|
+
self._remote_buffer_utilization = 0.0 # In bytes, but it has to be float otherwise it will drift badly due to integer division
|
|
61
|
+
self._min_batch = self.MIN_BATCH * self._psize
|
|
62
|
+
# Start the flow control task
|
|
63
|
+
self._flow_control = asyncio.create_task(self.flow_control())
|
|
64
|
+
|
|
65
|
+
def __call__(self, chunk: bytes) -> None:
|
|
66
|
+
"""Add an audio chunk to the local buffer
|
|
67
|
+
|
|
68
|
+
It handles arbitrary sized audio chunks but it's expected that the audio chunks are passed properly sampled.
|
|
69
|
+
No modifications are made on the audio chunks content after they are passed to the flow controller.
|
|
70
|
+
|
|
71
|
+
Args:
|
|
72
|
+
chunk: The audio chunk to add to the local buffer.
|
|
73
|
+
|
|
74
|
+
"""
|
|
75
|
+
self._local_buffer.extend(chunk)
|
|
76
|
+
logger.trace(
|
|
77
|
+
f"Buffered {len(chunk)} bytes to local buffer. Local buffer size: {len(self._local_buffer)} bytes."
|
|
78
|
+
)
|
|
79
|
+
|
|
80
|
+
async def flow_control(self):
|
|
81
|
+
"""Keep track of the remote buffer utilization and send audio whenever possible.
|
|
82
|
+
|
|
83
|
+
The method runs an infinite loop that:
|
|
84
|
+
- Calculate the remote buffer utilization using monotonic time instead of async sleeping time to avoid drift.
|
|
85
|
+
- Implement the flow control logic based on the remote buffer utilization and local buffer size.
|
|
86
|
+
- Sends audio chunks whenever the remote buffer utilization is below the low water mark and there are audio chunks in the local buffer, but never exceed the high water mark or the websocket maximum message size.
|
|
87
|
+
"""
|
|
88
|
+
|
|
89
|
+
last_tick = time.monotonic()
|
|
90
|
+
while True:
|
|
91
|
+
await asyncio.sleep(
|
|
92
|
+
self._ptime / 1000
|
|
93
|
+
) # Sleep for the duration of one audio chunk
|
|
94
|
+
current_time = time.monotonic()
|
|
95
|
+
elapsed_time = current_time - last_tick
|
|
96
|
+
self._remote_buffer_utilization = max(
|
|
97
|
+
0,
|
|
98
|
+
self._remote_buffer_utilization
|
|
99
|
+
- (self._psize * 1000 / self._ptime) * elapsed_time,
|
|
100
|
+
)
|
|
101
|
+
last_tick = current_time
|
|
102
|
+
|
|
103
|
+
# Flow control logic
|
|
104
|
+
# First check if we have something in the local buffer
|
|
105
|
+
if len(self._local_buffer) > 0:
|
|
106
|
+
# If the remote buffer is under the low water mark we send whatever we have in the local buffer
|
|
107
|
+
if self._remote_buffer_utilization < self._remote_buffer_low_water:
|
|
108
|
+
await self.send_chunks()
|
|
109
|
+
|
|
110
|
+
# If the remote buffer is in working range (between the low and high water marks)
|
|
111
|
+
# we only send if we have more than _min_batch bytes of audio in the local buffer to avoid sending small chunks on every tick
|
|
112
|
+
elif (
|
|
113
|
+
self._remote_buffer_utilization < self._remote_buffer_high_water
|
|
114
|
+
) and (len(self._local_buffer) >= self._min_batch):
|
|
115
|
+
await self.send_chunks()
|
|
116
|
+
# If the remote buffer is above the high water mark we don't send anything and wait for the next tick to see if the remote buffer utilization has decreased enough to send more audio
|
|
117
|
+
|
|
118
|
+
async def send_chunks(self):
|
|
119
|
+
"""Send audio chunks from the local buffer to websocket (effectively to the remote buffer on the Asterisk side).
|
|
120
|
+
|
|
121
|
+
The method:
|
|
122
|
+
- Sends as much bytes from the local buffer as possible but not more than remote buffer high water mark and websocket maximum message size.
|
|
123
|
+
- Updates the remote buffer utilization accordingly.
|
|
124
|
+
"""
|
|
125
|
+
|
|
126
|
+
# Calculate the number of bytes to send
|
|
127
|
+
bytes_to_send = int(
|
|
128
|
+
min(
|
|
129
|
+
len(self._local_buffer),
|
|
130
|
+
self._remote_buffer_high_water - self._remote_buffer_utilization,
|
|
131
|
+
)
|
|
132
|
+
)
|
|
133
|
+
bytes_to_send = min(
|
|
134
|
+
bytes_to_send, self.MAX_WS_SEND
|
|
135
|
+
) # Ensure we don't exceed the websocket maximum message size
|
|
136
|
+
if bytes_to_send > 0:
|
|
137
|
+
# Take the bytes to send from the local buffer
|
|
138
|
+
chunk = self._local_buffer[:bytes_to_send]
|
|
139
|
+
del self._local_buffer[:bytes_to_send]
|
|
140
|
+
|
|
141
|
+
# Send the chunk to the websocket
|
|
142
|
+
await self._websocket_client.send(chunk)
|
|
143
|
+
# Update the remote buffer utilization
|
|
144
|
+
self._remote_buffer_utilization += len(chunk)
|
|
145
|
+
logger.debug(
|
|
146
|
+
f"Sent {len(chunk)} bytes to websocket. Remote buffer utilization: {self._remote_buffer_utilization:.0f} bytes, {self._remote_buffer_utilization / (self._psize * self.REMOTE_BUFFER_SIZE) * 100:.1f}%."
|
|
147
|
+
)
|
|
148
|
+
|
|
149
|
+
def close(self, gracefully: bool = False):
|
|
150
|
+
"""Cancel the flow control task and optionally wait for the local buffer to be sent before cancelling.
|
|
151
|
+
|
|
152
|
+
Args:
|
|
153
|
+
gracefully: If True, wait for the local buffer to be sent before cancelling the flow control
|
|
154
|
+
"""
|
|
155
|
+
if self._flow_control:
|
|
156
|
+
if gracefully:
|
|
157
|
+
logger.info(
|
|
158
|
+
f"Gracefully closing flow controller. Waiting for local buffer to be sent..."
|
|
159
|
+
)
|
|
160
|
+
while len(self._local_buffer) > 0:
|
|
161
|
+
time.sleep(
|
|
162
|
+
self._ptime / 1000
|
|
163
|
+
) # Sleep for the duration of one audio chunk to give the flow control loop time to send the remaining audio in the local buffer
|
|
164
|
+
self._flow_control.cancel()
|
|
165
|
+
|
|
166
|
+
def drop_buffer(self):
|
|
167
|
+
"""Drop any buffered audio in the local buffer and reset remote buffer utilization to zero.
|
|
168
|
+
|
|
169
|
+
This is used when an interruption/stop/cancel frame is processed to avoid replaying stale audio.
|
|
170
|
+
"""
|
|
171
|
+
self._local_buffer.clear()
|
|
172
|
+
self._remote_buffer_utilization = 0.0
|
|
@@ -0,0 +1,192 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c) 2026, Nikolai Shakin
|
|
3
|
+
#
|
|
4
|
+
# SPDX-License-Identifier: BSD-2-Clause
|
|
5
|
+
#
|
|
6
|
+
|
|
7
|
+
from fastapi import WebSocket
|
|
8
|
+
from loguru import logger
|
|
9
|
+
from pipecat.transports.websocket.fastapi import (
|
|
10
|
+
FastAPIWebsocketClient,
|
|
11
|
+
FastAPIWebsocketOutputTransport,
|
|
12
|
+
FastAPIWebsocketTransport,
|
|
13
|
+
FastAPIWebsocketParams,
|
|
14
|
+
)
|
|
15
|
+
from pipecat.processors.frame_processor import FrameDirection
|
|
16
|
+
from pipecat.frames.frames import (
|
|
17
|
+
Frame,
|
|
18
|
+
InterruptionFrame,
|
|
19
|
+
CancelFrame,
|
|
20
|
+
StopFrame,
|
|
21
|
+
InputTransportMessageFrame,
|
|
22
|
+
OutputAudioRawFrame,
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
from .flow_controller import FlowController
|
|
26
|
+
from ..serializer.serializer import AsteriskFrameSerializer, AsteriskCommandFrame
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class AsteriskWebsocketOutputTransport(FastAPIWebsocketOutputTransport):
|
|
30
|
+
"""Subclass of FastAPIWebsocketOutputTransport to handle Asterisk WebSocket channel communication."""
|
|
31
|
+
|
|
32
|
+
def __init__(
|
|
33
|
+
self,
|
|
34
|
+
transport: "AsteriskWebsocketTransport",
|
|
35
|
+
client: FastAPIWebsocketClient,
|
|
36
|
+
params: FastAPIWebsocketParams | None = None,
|
|
37
|
+
name: str | None = None,
|
|
38
|
+
):
|
|
39
|
+
if params is None:
|
|
40
|
+
params = FastAPIWebsocketParams(
|
|
41
|
+
serializer=AsteriskFrameSerializer(),
|
|
42
|
+
audio_in_enabled=True,
|
|
43
|
+
audio_out_enabled=True,
|
|
44
|
+
)
|
|
45
|
+
super().__init__(transport, client, params)
|
|
46
|
+
self._flow_controller = None
|
|
47
|
+
|
|
48
|
+
async def _media_start_handler(self, frame: InputTransportMessageFrame):
|
|
49
|
+
"""Handle the MEDIA_START event.
|
|
50
|
+
|
|
51
|
+
Initializes the flow controller with ptime and psize values from the MEDIA_START event data.
|
|
52
|
+
Sends a START_MEDIA_BUFFERING command to Asterisk to enable audio buffering.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
ptime = int(frame.message.get("ptime", 0))
|
|
56
|
+
psize = int(frame.message.get("optimal_frame_size", 0))
|
|
57
|
+
|
|
58
|
+
if ptime <= 0 or psize <= 0:
|
|
59
|
+
logger.error(
|
|
60
|
+
f"Invalid ptime ({ptime}) or psize ({psize}) in MEDIA_START event {frame.message}. Cannot initialize flow controller."
|
|
61
|
+
)
|
|
62
|
+
return
|
|
63
|
+
|
|
64
|
+
self._flow_controller = FlowController(ptime, psize, self._client)
|
|
65
|
+
|
|
66
|
+
logger.debug(
|
|
67
|
+
f"Initialized flow controller with ptime={ptime} ms, psize={psize} bytes. Remote buffer low water mark: {self._flow_controller._remote_buffer_low_water} bytes, high water mark: {self._flow_controller._remote_buffer_high_water} bytes."
|
|
68
|
+
)
|
|
69
|
+
|
|
70
|
+
# Send START_MEDIA_BUFFERING command to Asterisk WebSocket channel to enable audio buffering on the Asterisk side
|
|
71
|
+
if self._client.is_closing or not self._client.is_connected:
|
|
72
|
+
logger.warning(
|
|
73
|
+
f"Cannot send START_MEDIA_BUFFERING command because the WebSocket client is closing or already closed."
|
|
74
|
+
)
|
|
75
|
+
return
|
|
76
|
+
if not self._params.serializer:
|
|
77
|
+
logger.error(
|
|
78
|
+
f"Cannot send START_MEDIA_BUFFERING command because no serializer is set in the transport parameters."
|
|
79
|
+
)
|
|
80
|
+
return
|
|
81
|
+
try:
|
|
82
|
+
cmd = await self._params.serializer.serialize(
|
|
83
|
+
AsteriskCommandFrame("START_MEDIA_BUFFERING")
|
|
84
|
+
)
|
|
85
|
+
if cmd:
|
|
86
|
+
await self._client.send(cmd)
|
|
87
|
+
logger.info(
|
|
88
|
+
f"Sent START_MEDIA_BUFFERING command to Asterisk WebSocket channel to enable audio buffering."
|
|
89
|
+
)
|
|
90
|
+
except Exception as e:
|
|
91
|
+
logger.error(
|
|
92
|
+
f"{self} exception sending START_MEDIA_BUFFERING: {e.__class__.__name__} ({e})"
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
async def process_frame(self, frame: Frame, direction: FrameDirection):
|
|
96
|
+
"""Process outgoing frames.
|
|
97
|
+
|
|
98
|
+
Args:
|
|
99
|
+
frame: The frame to process.
|
|
100
|
+
direction: The direction of frame flow in the pipeline.
|
|
101
|
+
"""
|
|
102
|
+
await super().process_frame(frame, direction)
|
|
103
|
+
|
|
104
|
+
if isinstance(frame, (InterruptionFrame, CancelFrame, StopFrame)):
|
|
105
|
+
# Drop any buffered audio in local and remote buffers to avoid replaying stale PCM
|
|
106
|
+
if self._flow_controller:
|
|
107
|
+
self._flow_controller.drop_buffer()
|
|
108
|
+
elif (
|
|
109
|
+
isinstance(frame, InputTransportMessageFrame)
|
|
110
|
+
and frame.message.get("event", None) == "MEDIA_START"
|
|
111
|
+
):
|
|
112
|
+
await self._media_start_handler(frame)
|
|
113
|
+
|
|
114
|
+
async def write_audio_frame(self, frame: OutputAudioRawFrame) -> bool:
|
|
115
|
+
"""Write an audio frame into local buffer.
|
|
116
|
+
|
|
117
|
+
The method overrides parent class method. Effectively the audio frame is passed to the flow controller
|
|
118
|
+
instead of writing them directly to the websocket. Formally, this method doesn't write audio frames as the name suggests.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
frame: The output audio frame to write.
|
|
122
|
+
|
|
123
|
+
Returns:
|
|
124
|
+
True if the audio frame was "written" (passed to the flow controller) successfully, False otherwise.
|
|
125
|
+
"""
|
|
126
|
+
|
|
127
|
+
if self._client.is_closing or not self._client.is_connected:
|
|
128
|
+
logger.warning(
|
|
129
|
+
f"Cannot write audio frame because the WebSocket client is closing or already closed."
|
|
130
|
+
)
|
|
131
|
+
return False
|
|
132
|
+
|
|
133
|
+
if not self._params.serializer:
|
|
134
|
+
logger.error(
|
|
135
|
+
f"Serializer is not set in transport parameters. Cannot write audio frame."
|
|
136
|
+
)
|
|
137
|
+
return False
|
|
138
|
+
|
|
139
|
+
if self._flow_controller is None:
|
|
140
|
+
logger.error(
|
|
141
|
+
f"Flow controller is not initialized. Cannot write audio frame."
|
|
142
|
+
)
|
|
143
|
+
return False
|
|
144
|
+
|
|
145
|
+
frame = OutputAudioRawFrame(
|
|
146
|
+
audio=frame.audio,
|
|
147
|
+
sample_rate=frame.sample_rate,
|
|
148
|
+
num_channels=frame.num_channels,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
try:
|
|
152
|
+
payload = await self._params.serializer.serialize(frame)
|
|
153
|
+
if payload:
|
|
154
|
+
if type(payload) == bytes:
|
|
155
|
+
self._flow_controller(payload)
|
|
156
|
+
return True
|
|
157
|
+
else:
|
|
158
|
+
logger.error(
|
|
159
|
+
f"Serialized audio frame is not bytes. Got {type(payload)} instead. Cannot write audio frame."
|
|
160
|
+
)
|
|
161
|
+
return False
|
|
162
|
+
else:
|
|
163
|
+
logger.trace(
|
|
164
|
+
f"Serializer returned None or empty payload. Cannot write audio frame."
|
|
165
|
+
)
|
|
166
|
+
return False
|
|
167
|
+
except Exception as e:
|
|
168
|
+
logger.error(f"{self} exception sending data: {e.__class__.__name__} ({e})")
|
|
169
|
+
return False
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
class AsteriskWebsocketTransport(FastAPIWebsocketTransport):
|
|
173
|
+
"""Subclass of FastAPIWebsocketTransport to handle Asterisk WebSocket channel communication."""
|
|
174
|
+
|
|
175
|
+
def __init__(
|
|
176
|
+
self,
|
|
177
|
+
websocket: WebSocket,
|
|
178
|
+
params: FastAPIWebsocketParams | None = None,
|
|
179
|
+
input_name: str | None = None,
|
|
180
|
+
output_name: str | None = None,
|
|
181
|
+
):
|
|
182
|
+
if params is None:
|
|
183
|
+
params = FastAPIWebsocketParams(
|
|
184
|
+
serializer=AsteriskFrameSerializer(),
|
|
185
|
+
audio_in_enabled=True,
|
|
186
|
+
audio_out_enabled=True,
|
|
187
|
+
)
|
|
188
|
+
super().__init__(websocket, params, input_name, output_name)
|
|
189
|
+
|
|
190
|
+
self._output = AsteriskWebsocketOutputTransport(
|
|
191
|
+
self, self._client, params, name=output_name
|
|
192
|
+
)
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
Metadata-Version: 2.3
|
|
2
|
+
Name: pipecat-asterisk
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: This package provides a WebSocket transport for integrating Pipecat applications with Asterisk websocket channel, enabling real-time audio streaming and signalling interaction between Asterisk and Pipecat applications.
|
|
5
|
+
Author: Nikolai Shakin
|
|
6
|
+
Author-email: Nikolai Shakin <nikolay.n.shakin@gmail.com>
|
|
7
|
+
License: BSD-2-Clause
|
|
8
|
+
Classifier: License :: OSI Approved :: BSD License
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Requires-Dist: fastapi>=0.136.1
|
|
11
|
+
Requires-Dist: pipecat-ai>=1.1.0
|
|
12
|
+
Requires-Dist: websockets>=15.0.1
|
|
13
|
+
Requires-Python: >=3.12
|
|
14
|
+
Description-Content-Type: text/markdown
|
|
15
|
+
|
|
16
|
+
# pipecat-asterisk
|
|
17
|
+
|
|
18
|
+
A [Pipecat](https://github.com/pipecat-ai/pipecat) community integration for Asterisk.
|
|
19
|
+
This repository provides a transport and frame serializer to connect your Asterisk with Pipecat pipelines.
|
|
20
|
+
|
|
21
|
+
## Features
|
|
22
|
+
|
|
23
|
+
- **`AsteriskWebsocketTransport`**: Handles raw audio streaming and lifecycle events natively with Asterisk.
|
|
24
|
+
- **`AsteriskFrameSerializer`**: Serializer to translate Asterisk websocket JSON or plain-text payloads and raw(audio) payloads into Pipecat frames.
|
|
25
|
+
- **Flow Control**: Built-in logic to manage buffer utilization between the Pipecat application and Asterisk.
|
|
26
|
+
|
|
27
|
+
## Installation
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
uv add pipecat-asterisk
|
|
31
|
+
```
|
|
32
|
+
*(Or use `pip install pipecat-asterisk` if you are using `pip` for dependency management)*
|
|
33
|
+
|
|
34
|
+
## Usage
|
|
35
|
+
|
|
36
|
+
Here is a basic example of how to integrate the Asterisk WebSocket transport into a Pipecat pipeline:
|
|
37
|
+
|
|
38
|
+
```python
|
|
39
|
+
from fastapi import FastAPI, WebSocket
|
|
40
|
+
from pipecat.pipeline.pipeline import Pipeline
|
|
41
|
+
from pipecat.pipeline.task import PipelineTask
|
|
42
|
+
from pipecat_asterisk import AsteriskWebsocketTransport
|
|
43
|
+
|
|
44
|
+
app = FastAPI()
|
|
45
|
+
|
|
46
|
+
@app.websocket("/ws")
|
|
47
|
+
async def websocket_endpoint(websocket: WebSocket):
|
|
48
|
+
await websocket.accept()
|
|
49
|
+
|
|
50
|
+
# Initialize the Asterisk Transport
|
|
51
|
+
ws_transport = AsteriskWebsocketTransport(websocket=websocket)
|
|
52
|
+
|
|
53
|
+
# Build your Pipecat pipeline
|
|
54
|
+
pipeline = Pipeline([
|
|
55
|
+
ws_transport.input(),
|
|
56
|
+
# ... other pipeline components (VAD, LLM, TTS, etc.)
|
|
57
|
+
ws_transport.output(),
|
|
58
|
+
])
|
|
59
|
+
|
|
60
|
+
task = PipelineTask(pipeline)
|
|
61
|
+
|
|
62
|
+
# Run the pipeline
|
|
63
|
+
# ...
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
## Running the Example
|
|
67
|
+
|
|
68
|
+
An example Gemini-based voice bot is provided in `examples/pipecat_asterisk/`.
|
|
69
|
+
|
|
70
|
+
### 1. Configure Asterisk
|
|
71
|
+
The `examples/pipecat_asterisk/` directory includes a `docker-compose.yml` and Asterisk configuration files in `etc/` to easily spin up a local Asterisk testing environment. After the Docker container is running you can connect any sip client to `localhost:5060` with the credentials specified in `etc/asterisk/pjsip.conf` (user: `1`, password: `1`). There are a few extensions configured in `etc/asterisk/extensions.conf` that you can use to test the bot, every extension represents a respective sampling rate.
|
|
72
|
+
|
|
73
|
+
```
|
|
74
|
+
exten = 8,1,Dial(WebSocket/pipecat/c(slin))
|
|
75
|
+
exten = 12,1,Dial(WebSocket/pipecat/c(slin12))
|
|
76
|
+
exten = 16,1,Dial(WebSocket/pipecat/c(slin16))
|
|
77
|
+
exten = 24,1,Dial(WebSocket/pipecat/c(slin24))
|
|
78
|
+
...
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
To run the Asterisk server with the provided configuration:
|
|
82
|
+
|
|
83
|
+
```bash
|
|
84
|
+
cd examples/pipecat_asterisk
|
|
85
|
+
docker-compose up -d
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### 2. Set API Keys
|
|
89
|
+
The example uses Google's Gemini for conversational AI. Create a `.env` file or export your key directly:
|
|
90
|
+
|
|
91
|
+
```bash
|
|
92
|
+
export GOOGLE_API_KEY="your-google-api-key"
|
|
93
|
+
```
|
|
94
|
+
|
|
95
|
+
### 3. Run the application
|
|
96
|
+
Run the WebSocket server:
|
|
97
|
+
```bash
|
|
98
|
+
uv sync
|
|
99
|
+
uv run examples/pipecat_asterisk/ws_server.py
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Compatibility
|
|
103
|
+
|
|
104
|
+
- Tested with **Pipecat v1.1.0**
|
|
105
|
+
- Requires **Python 3.12+**
|
|
106
|
+
|
|
107
|
+
## Internal Architecture
|
|
108
|
+
|
|
109
|
+
The transport and the serializer are designed to work with `slin` encoded audio, because Asterisk natively supports all the flavors of `slin` and Pipecat's audio frames require to be slin-encoded.
|
|
110
|
+
If you need to use a different codec, you can transcode on the Asterisk side, it's computationally more efficient and simplifies the transport and serializer logic.
|
|
111
|
+
|
|
112
|
+
Serializer and the transport implementation are based on the [Asterisk websocket channel documentation](https://docs.asterisk.org/Configuration/Channel-Drivers/WebSocket/).
|
|
113
|
+
|
|
114
|
+
The transport supports flow control logic to manage the buffer utilization between the Pipecat application and Asterisk. This ensures that we don't overwhelm the Asterisk server with too much data at once, while also ensuring that we send data as soon as there is capacity in the remote buffer. The output transport create an instance of flow controller and adds serialized (and resampled if needed) audio frames to the flow controller. The flow controller then decides when to send data to Asterisk based on the current buffer utilization and the amount of data in the local buffer. The flow control logic is as follows:
|
|
115
|
+
|
|
116
|
+
### Flow control logic
|
|
117
|
+
```mermaid
|
|
118
|
+
flowchart TD
|
|
119
|
+
C{Remote buffer utilization < <br>low water}
|
|
120
|
+
C -->|yes| D{There are bytes in local buffer}
|
|
121
|
+
C -->|no| H{Remote buffer utilization > <br>high water}
|
|
122
|
+
H -->|yes| E[Skip]
|
|
123
|
+
D -->|yes| F[Send up to MAX_WS_SEND <br> bytes from local buffer]
|
|
124
|
+
D -->|no| E
|
|
125
|
+
H -->|no| I{Do we have more than <br>MIN_BATCH bytes in local <br>buffer}
|
|
126
|
+
I -->|yes| F
|
|
127
|
+
I -->|no| E
|
|
128
|
+
```
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
pipecat_asterisk/__init__.py,sha256=4h0wX0LetaHTL7QqfGZbVU0v5f6iWPQ4_YxbAjhb2J0,272
|
|
2
|
+
pipecat_asterisk/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
|
+
pipecat_asterisk/serializer/__init__.py,sha256=eJMxpL0FnIPY4-Ullm6BNQXCbgslINycj-kz9iu4iaw,234
|
|
4
|
+
pipecat_asterisk/serializer/protocol.py,sha256=iEyAr4VzvL2NlZQsYdVWqbkXPVhXRVTkceFYE4sJBUU,3445
|
|
5
|
+
pipecat_asterisk/serializer/serializer.py,sha256=H3OxesEUhGdnEGGozsgroVKK-dvMJk9wBHc9Vh6cnN4,22640
|
|
6
|
+
pipecat_asterisk/transport/__init__.py,sha256=jUJ0IiK5QY3ZM4hvWbh021HBlpanKp6ceDRf5GYAnu4,238
|
|
7
|
+
pipecat_asterisk/transport/flow_controller.py,sha256=YysxpQJe2hc0trM4IqnXQ1_J6AqZu--UNFu8U-YqiZU,8774
|
|
8
|
+
pipecat_asterisk/transport/transport.py,sha256=ost-poEsTdSt_YpoLN_sm6pWwfRiPAqAsoZNtWobqa0,7239
|
|
9
|
+
pipecat_asterisk-0.1.0.dist-info/WHEEL,sha256=jROcLULcdzropX2J55opKw4UHhPFREZax2XzS-Mvpxs,80
|
|
10
|
+
pipecat_asterisk-0.1.0.dist-info/METADATA,sha256=jz_CGKpR0RI_ApvOvixpd_i8eIgqOh3fAEs_m5EapdY,5186
|
|
11
|
+
pipecat_asterisk-0.1.0.dist-info/RECORD,,
|