@framers/agentos 0.1.111 → 0.1.112
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/voice/CallManager.d.ts.map +1 -1
- package/dist/voice/CallManager.js +9 -1
- package/dist/voice/CallManager.js.map +1 -1
- package/dist/voice/MediaStreamParser.d.ts +115 -6
- package/dist/voice/MediaStreamParser.d.ts.map +1 -1
- package/dist/voice/MediaStreamParser.js +44 -0
- package/dist/voice/MediaStreamParser.js.map +1 -1
- package/dist/voice/TelephonyStreamTransport.d.ts +112 -20
- package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -1
- package/dist/voice/TelephonyStreamTransport.js +136 -30
- package/dist/voice/TelephonyStreamTransport.js.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +64 -6
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.js +67 -6
- package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -1
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +55 -8
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/TelnyxMediaStreamParser.js +60 -9
- package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -1
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +73 -11
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/TwilioMediaStreamParser.js +81 -12
- package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -1
- package/dist/voice/providers/plivo.d.ts +108 -12
- package/dist/voice/providers/plivo.d.ts.map +1 -1
- package/dist/voice/providers/plivo.js +106 -9
- package/dist/voice/providers/plivo.js.map +1 -1
- package/dist/voice/providers/telnyx.d.ts +110 -20
- package/dist/voice/providers/telnyx.d.ts.map +1 -1
- package/dist/voice/providers/telnyx.js +111 -20
- package/dist/voice/providers/telnyx.js.map +1 -1
- package/dist/voice/providers/twilio.d.ts +91 -13
- package/dist/voice/providers/twilio.d.ts.map +1 -1
- package/dist/voice/providers/twilio.js +94 -14
- package/dist/voice/providers/twilio.js.map +1 -1
- package/dist/voice/twiml.d.ts +70 -12
- package/dist/voice/twiml.d.ts.map +1 -1
- package/dist/voice/twiml.js +70 -12
- package/dist/voice/twiml.js.map +1 -1
- package/dist/voice/types.d.ts +142 -15
- package/dist/voice/types.d.ts.map +1 -1
- package/dist/voice/types.js +34 -3
- package/dist/voice/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,12 +1,62 @@
|
|
|
1
1
|
/**
|
|
2
|
-
* @fileoverview TelephonyStreamTransport
|
|
2
|
+
* @fileoverview TelephonyStreamTransport -- bridges a telephony WebSocket media
|
|
3
3
|
* stream to the AgentOS streaming voice pipeline.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
5
|
+
* ## Audio conversion chain
|
|
6
|
+
*
|
|
7
|
+
* ### Inbound path (phone -> pipeline)
|
|
8
|
+
*
|
|
9
|
+
* ```
|
|
10
|
+
* Provider WS frame
|
|
11
|
+
* │ (JSON string or raw binary)
|
|
12
|
+
* ▼
|
|
13
|
+
* MediaStreamParser.parseIncoming()
|
|
14
|
+
* │ (normalised MediaStreamIncoming)
|
|
15
|
+
* ▼
|
|
16
|
+
* mu-law 8 kHz bytes
|
|
17
|
+
* │ convertMulawToPcm16() -- ITU G.711 mu-law expansion table
|
|
18
|
+
* ▼
|
|
19
|
+
* Int16 PCM 8 kHz (2 bytes/sample)
|
|
20
|
+
* │ resample(8000 -> outputSampleRate) -- linear interpolation
|
|
21
|
+
* ▼
|
|
22
|
+
* Int16 PCM at outputSampleRate (default 16 kHz)
|
|
23
|
+
* │ sample / 32768 -- normalise to IEEE 754 float range
|
|
24
|
+
* ▼
|
|
25
|
+
* Float32 [-1, 1] at outputSampleRate
|
|
26
|
+
* │ emit('audio', AudioFrame)
|
|
27
|
+
* ▼
|
|
28
|
+
* Voice pipeline (VAD / STT)
|
|
29
|
+
* ```
|
|
30
|
+
*
|
|
31
|
+
* ### Outbound path (pipeline -> phone)
|
|
32
|
+
*
|
|
33
|
+
* ```
|
|
34
|
+
* EncodedAudioChunk (PCM Int16 at chunk.sampleRate)
|
|
35
|
+
* │ resample(chunk.sampleRate -> 8 kHz) -- linear interpolation
|
|
36
|
+
* ▼
|
|
37
|
+
* Int16 PCM 8 kHz
|
|
38
|
+
* │ convertPcmToMulaw8k() -- ITU G.711 mu-law compression
|
|
39
|
+
* ▼
|
|
40
|
+
* mu-law 8 kHz bytes
|
|
41
|
+
* │ MediaStreamParser.formatOutgoing()
|
|
42
|
+
* ▼
|
|
43
|
+
* Provider WS frame -- sent to caller
|
|
44
|
+
* ```
|
|
45
|
+
*
|
|
46
|
+
* ## Why 8 kHz <-> 16 kHz resampling?
|
|
47
|
+
*
|
|
48
|
+
* The PSTN (Public Switched Telephone Network) uses 8 kHz sampling (G.711).
|
|
49
|
+
* Most modern STT engines (OpenAI Whisper, Deepgram, etc.) expect 16 kHz
|
|
50
|
+
* or higher. The transport bridges this gap with simple linear interpolation,
|
|
51
|
+
* which is adequate for narrow-band voice telephony.
|
|
52
|
+
*
|
|
53
|
+
* ## State machine
|
|
54
|
+
*
|
|
55
|
+
* ```
|
|
56
|
+
* connecting ──[start msg]──> open ──[stop msg / ws.close()]──> closed
|
|
57
|
+
* │ │
|
|
58
|
+
* └──[ws.close()]──> closed └──[close()]──> closing ──[ws 'close']──> closed
|
|
59
|
+
* ```
|
|
10
60
|
*
|
|
11
61
|
* @module @framers/agentos/voice/TelephonyStreamTransport
|
|
12
62
|
*/
|
|
@@ -17,32 +67,54 @@ import { convertPcmToMulaw8k, convertMulawToPcm16 } from './telephony-audio.js';
|
|
|
17
67
|
* Adapts a telephony provider WebSocket media stream to the
|
|
18
68
|
* {@link IStreamTransport} interface consumed by the AgentOS voice pipeline.
|
|
19
69
|
*
|
|
20
|
-
* ## Inbound path (phone
|
|
70
|
+
* ## Inbound path (phone -> pipeline)
|
|
21
71
|
* 1. Provider WebSocket frames arrive as raw `Buffer` or JSON `string`.
|
|
22
72
|
* 2. {@link MediaStreamParser.parseIncoming} normalises them to
|
|
23
73
|
* {@link MediaStreamIncoming} events.
|
|
24
|
-
* 3. `'audio'` events: mu-law 8 kHz
|
|
74
|
+
* 3. `'audio'` events: mu-law 8 kHz -> Int16 PCM -> resample -> Float32 -> `'audio'` emit.
|
|
25
75
|
* 4. `'dtmf'` / `'mark'` events are re-emitted as-is for higher-layer handling.
|
|
26
76
|
* 5. `'start'` transitions the transport to `'open'` and sends the optional
|
|
27
77
|
* connection acknowledgment from the parser.
|
|
28
78
|
* 6. `'stop'` or WebSocket close transitions to `'closed'` and emits `'close'`.
|
|
29
79
|
*
|
|
30
|
-
* ## Outbound path (pipeline
|
|
80
|
+
* ## Outbound path (pipeline -> phone)
|
|
31
81
|
* 1. {@link sendAudio} receives an {@link EncodedAudioChunk} (PCM Int16 format assumed).
|
|
32
|
-
* 2. Chunk is resampled from `chunk.sampleRate`
|
|
82
|
+
* 2. Chunk is resampled from `chunk.sampleRate` -> 8 kHz via linear interpolation.
|
|
33
83
|
* 3. Resampled PCM is mu-law encoded via {@link convertPcmToMulaw8k}.
|
|
34
84
|
* 4. {@link MediaStreamParser.formatOutgoing} wraps the bytes for the provider.
|
|
35
85
|
* 5. The formatted payload is sent over the WebSocket.
|
|
36
86
|
*
|
|
37
|
-
*
|
|
38
|
-
* - `'audio'` ({@link AudioFrame})
|
|
39
|
-
* - `'dtmf'` (`{ digit: string; durationMs?: number }`)
|
|
40
|
-
* - `'mark'` (`{ name: string }`)
|
|
41
|
-
* - `'close'` ()
|
|
42
|
-
* - `'error'` (Error)
|
|
87
|
+
* ## Events emitted
|
|
88
|
+
* - `'audio'` ({@link AudioFrame}) -- inbound decoded audio for STT / VAD.
|
|
89
|
+
* - `'dtmf'` (`{ digit: string; durationMs?: number }`) -- caller key-press.
|
|
90
|
+
* - `'mark'` (`{ name: string }`) -- named stream marker.
|
|
91
|
+
* - `'close'` () -- transport has been fully closed.
|
|
92
|
+
* - `'error'` (Error) -- unrecoverable WebSocket or parsing error.
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* const parser = new TwilioMediaStreamParser();
|
|
97
|
+
* const transport = new TelephonyStreamTransport(ws, parser, { outputSampleRate: 16000 });
|
|
98
|
+
*
|
|
99
|
+
* transport.on('audio', (frame: AudioFrame) => {
|
|
100
|
+
* // Feed to STT engine
|
|
101
|
+
* sttEngine.pushAudio(frame.samples, frame.sampleRate);
|
|
102
|
+
* });
|
|
103
|
+
*
|
|
104
|
+
* transport.on('dtmf', ({ digit }) => {
|
|
105
|
+
* console.log(`Caller pressed: ${digit}`);
|
|
106
|
+
* });
|
|
107
|
+
* ```
|
|
43
108
|
*/
|
|
44
109
|
export class TelephonyStreamTransport extends EventEmitter {
|
|
45
|
-
/**
|
|
110
|
+
/**
|
|
111
|
+
* Current connection lifecycle state.
|
|
112
|
+
*
|
|
113
|
+
* - `connecting` -- WebSocket is open but the provider's `start` event has not arrived yet.
|
|
114
|
+
* - `open` -- Stream is active; audio can be sent and received.
|
|
115
|
+
* - `closing` -- {@link close} was called; waiting for WS to finish closing.
|
|
116
|
+
* - `closed` -- Stream is fully terminated; no further I/O.
|
|
117
|
+
*/
|
|
46
118
|
get state() {
|
|
47
119
|
return this._state;
|
|
48
120
|
}
|
|
@@ -50,6 +122,12 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
50
122
|
// Constructor
|
|
51
123
|
// ---------------------------------------------------------------------------
|
|
52
124
|
/**
|
|
125
|
+
* Create a new telephony stream transport.
|
|
126
|
+
*
|
|
127
|
+
* Wires up WebSocket event handlers immediately. The transport starts in
|
|
128
|
+
* `'connecting'` state and transitions to `'open'` when the provider sends
|
|
129
|
+
* its `start` event through the media stream.
|
|
130
|
+
*
|
|
53
131
|
* @param ws - WebSocket-like object (must emit `'message'`, `'close'`, `'error'`
|
|
54
132
|
* and expose `send(data)` and `close(code?, reason?)` methods).
|
|
55
133
|
* @param parser - Provider-specific message parser/formatter.
|
|
@@ -61,10 +139,11 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
61
139
|
this.ws = ws;
|
|
62
140
|
this.parser = parser;
|
|
63
141
|
// ---------------------------------------------------------------------------
|
|
64
|
-
// IStreamTransport
|
|
142
|
+
// IStreamTransport -- identity & state
|
|
65
143
|
// ---------------------------------------------------------------------------
|
|
66
144
|
/** Stable UUID for this transport connection. */
|
|
67
145
|
this.id = randomUUID();
|
|
146
|
+
/** Internal state -- not directly assignable from outside. */
|
|
68
147
|
this._state = 'connecting';
|
|
69
148
|
// ---------------------------------------------------------------------------
|
|
70
149
|
// Private fields
|
|
@@ -81,19 +160,24 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
81
160
|
this.streamSid = parsed.streamSid;
|
|
82
161
|
this._state = 'open';
|
|
83
162
|
// Send connection acknowledgment if the provider requires one.
|
|
163
|
+
// Twilio expects a { event: 'connected' } message; Telnyx/Plivo do not.
|
|
84
164
|
const connMsg = this.parser.formatConnected?.(parsed.streamSid);
|
|
85
165
|
if (connMsg)
|
|
86
166
|
this.ws.send(connMsg);
|
|
87
167
|
break;
|
|
88
168
|
}
|
|
89
169
|
case 'audio': {
|
|
90
|
-
// mu-law 8 kHz
|
|
170
|
+
// Step 1: mu-law 8 kHz -> Int16 PCM buffer (2 bytes per sample).
|
|
91
171
|
const pcm16Buf = convertMulawToPcm16(parsed.payload);
|
|
92
|
-
//
|
|
172
|
+
// Step 2: Reinterpret the Node.js Buffer as a typed Int16Array.
|
|
173
|
+
// Uses the same underlying ArrayBuffer (zero-copy) thanks to
|
|
174
|
+
// Buffer's offset/length alignment guarantees.
|
|
93
175
|
const int16 = new Int16Array(pcm16Buf.buffer, pcm16Buf.byteOffset, pcm16Buf.byteLength / 2);
|
|
94
|
-
// Resample 8 kHz
|
|
176
|
+
// Step 3: Resample 8 kHz -> outputSampleRate (typically 16 kHz).
|
|
177
|
+
// Linear interpolation is good enough for narrow-band telephony voice.
|
|
95
178
|
const resampled = this.resample(int16, 8000, this.outputSampleRate);
|
|
96
|
-
// Normalise Int16
|
|
179
|
+
// Step 4: Normalise Int16 range [-32768, 32767] to Float32 [-1.0, 1.0].
|
|
180
|
+
// Division by 32768 (not 32767) matches the WebAudio / Whisper convention.
|
|
97
181
|
const float32 = new Float32Array(resampled.length);
|
|
98
182
|
for (let i = 0; i < resampled.length; i++) {
|
|
99
183
|
float32[i] = resampled[i] / 32768;
|
|
@@ -107,6 +191,8 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
107
191
|
break;
|
|
108
192
|
}
|
|
109
193
|
case 'dtmf':
|
|
194
|
+
// Relay DTMF as a separate event so higher-layer IVR/menu logic
|
|
195
|
+
// can react without parsing audio frames.
|
|
110
196
|
this.emit('dtmf', { digit: parsed.digit, durationMs: parsed.durationMs });
|
|
111
197
|
break;
|
|
112
198
|
case 'stop':
|
|
@@ -114,10 +200,14 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
114
200
|
this.emit('close');
|
|
115
201
|
break;
|
|
116
202
|
case 'mark':
|
|
203
|
+
// Marks correlate outbound audio playback with application events
|
|
204
|
+
// (e.g., "TTS utterance finished playing to the caller").
|
|
117
205
|
this.emit('mark', { name: parsed.name });
|
|
118
206
|
break;
|
|
119
207
|
}
|
|
120
208
|
});
|
|
209
|
+
// Handle unexpected WS closure (network drop, server-side disconnect).
|
|
210
|
+
// Guard against double-firing if we already processed a 'stop' event.
|
|
121
211
|
this.ws.on('close', () => {
|
|
122
212
|
if (this._state !== 'closed') {
|
|
123
213
|
this._state = 'closed';
|
|
@@ -127,7 +217,7 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
127
217
|
this.ws.on('error', (err) => this.emit('error', err));
|
|
128
218
|
}
|
|
129
219
|
// ---------------------------------------------------------------------------
|
|
130
|
-
// IStreamTransport
|
|
220
|
+
// IStreamTransport -- outbound methods
|
|
131
221
|
// ---------------------------------------------------------------------------
|
|
132
222
|
/**
|
|
133
223
|
* Send synthesised audio to the caller.
|
|
@@ -136,6 +226,9 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
136
226
|
* signed 16-bit little-endian PCM samples at `chunk.sampleRate`. The audio
|
|
137
227
|
* is resampled to 8 kHz, mu-law encoded, and forwarded via the parser.
|
|
138
228
|
*
|
|
229
|
+
* No-op if the transport is not in the `'open'` state (e.g., before the
|
|
230
|
+
* provider's `start` event or after the stream has closed).
|
|
231
|
+
*
|
|
139
232
|
* @param chunk - Encoded audio chunk from the TTS pipeline.
|
|
140
233
|
*/
|
|
141
234
|
async sendAudio(chunk) {
|
|
@@ -143,12 +236,13 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
143
236
|
return;
|
|
144
237
|
// Interpret raw bytes as Int16 samples.
|
|
145
238
|
const int16 = new Int16Array(chunk.audio.buffer, chunk.audio.byteOffset, chunk.audio.byteLength / 2);
|
|
146
|
-
// Resample to 8 kHz,
|
|
239
|
+
// Resample to 8 kHz first, then encode to mu-law.
|
|
240
|
+
// We pre-resample so convertPcmToMulaw8k's internal resampler sees 8 kHz
|
|
241
|
+
// input and acts as a no-op, avoiding a redundant second pass.
|
|
147
242
|
const resampled8k = this.resample(int16, chunk.sampleRate, 8000);
|
|
148
243
|
const pcm8kBuf = Buffer.from(resampled8k.buffer, resampled8k.byteOffset, resampled8k.byteLength);
|
|
149
|
-
// Encode to mu-law.
|
|
150
|
-
//
|
|
151
|
-
// the function's internal resampler is a no-op.
|
|
244
|
+
// Encode to mu-law (ITU G.711). Pass sampleRate=8000 so the function's
|
|
245
|
+
// internal resampler is a no-op.
|
|
152
246
|
const mulaw = convertPcmToMulaw8k(pcm8kBuf, 8000);
|
|
153
247
|
const formatted = this.parser.formatOutgoing(mulaw, this.streamSid);
|
|
154
248
|
this.ws.send(formatted);
|
|
@@ -156,6 +250,8 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
156
250
|
/**
|
|
157
251
|
* Send a JSON control message over the WebSocket.
|
|
158
252
|
*
|
|
253
|
+
* No-op if the transport is not in the `'open'` state.
|
|
254
|
+
*
|
|
159
255
|
* @param message - Server-to-client pipeline protocol message.
|
|
160
256
|
*/
|
|
161
257
|
async sendControl(message) {
|
|
@@ -166,6 +262,10 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
166
262
|
/**
|
|
167
263
|
* Initiate graceful closure of the transport.
|
|
168
264
|
*
|
|
265
|
+
* Sets state to `'closing'` and delegates to the underlying WebSocket's
|
|
266
|
+
* `close()` method. The actual transition to `'closed'` happens when the
|
|
267
|
+
* WebSocket's `'close'` event fires.
|
|
268
|
+
*
|
|
169
269
|
* @param code - Optional WebSocket close code (default 1000).
|
|
170
270
|
* @param reason - Optional human-readable close reason.
|
|
171
271
|
*/
|
|
@@ -179,9 +279,13 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
179
279
|
/**
|
|
180
280
|
* Linear interpolation resampler for 16-bit signed PCM.
|
|
181
281
|
*
|
|
182
|
-
* Not studio-quality but sufficient for narrow-band voice telephony
|
|
183
|
-
*
|
|
184
|
-
*
|
|
282
|
+
* Not studio-quality but sufficient for narrow-band voice telephony where
|
|
283
|
+
* the source material is already limited to ~3.4 kHz bandwidth by the PSTN.
|
|
284
|
+
* A higher-quality sinc interpolator would add latency and complexity with
|
|
285
|
+
* negligible perceptual improvement at telephone bandwidths.
|
|
286
|
+
*
|
|
287
|
+
* The output length is computed as `round(input.length * toRate / fromRate)`
|
|
288
|
+
* to avoid cumulative rounding drift across many small frames.
|
|
185
289
|
*
|
|
186
290
|
* @param input - Source samples as a signed 16-bit integer array.
|
|
187
291
|
* @param fromRate - Sample rate of the input, in Hz.
|
|
@@ -189,6 +293,7 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
189
293
|
* @returns A new Int16Array at `toRate`.
|
|
190
294
|
*/
|
|
191
295
|
resample(input, fromRate, toRate) {
|
|
296
|
+
// No-op fast path: avoid allocation when rates match.
|
|
192
297
|
if (fromRate === toRate)
|
|
193
298
|
return input;
|
|
194
299
|
const ratio = fromRate / toRate;
|
|
@@ -198,6 +303,7 @@ export class TelephonyStreamTransport extends EventEmitter {
|
|
|
198
303
|
const srcIdx = i * ratio;
|
|
199
304
|
const idx = Math.floor(srcIdx);
|
|
200
305
|
const frac = srcIdx - idx;
|
|
306
|
+
// Linear interpolation between adjacent samples.
|
|
201
307
|
const a = input[idx] ?? 0;
|
|
202
308
|
const b = input[Math.min(idx + 1, input.length - 1)] ?? 0;
|
|
203
309
|
output[i] = Math.round(a + frac * (b - a));
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TelephonyStreamTransport.js","sourceRoot":"","sources":["../../src/voice/TelephonyStreamTransport.ts"],"names":[],"mappings":"AAAA
|
|
1
|
+
{"version":3,"file":"TelephonyStreamTransport.js","sourceRoot":"","sources":["../../src/voice/TelephonyStreamTransport.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA6DG;AAEH,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAC3C,OAAO,EAAE,UAAU,EAAE,MAAM,aAAa,CAAC;AAEzC,OAAO,EAAE,mBAAmB,EAAE,mBAAmB,EAAE,MAAM,sBAAsB,CAAC;AAwBhF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GA0CG;AACH,MAAM,OAAO,wBAAyB,SAAQ,YAAY;IAWxD;;;;;;;OAOG;IACH,IAAI,KAAK;QACP,OAAO,IAAI,CAAC,MAAM,CAAC;IACrB,CAAC;IAYD,8EAA8E;IAC9E,cAAc;IACd,8EAA8E;IAE9E;;;;;;;;;;;OAWG;IACH,YACmB,EAAO,EAAE,8DAA8D;IACvE,MAAyB,EAC1C,MAAuC;QAEvC,KAAK,EAAE,CAAC;QAJS,OAAE,GAAF,EAAE,CAAK;QACP,WAAM,GAAN,MAAM,CAAmB;QAlD5C,8EAA8E;QAC9E,uCAAuC;QACvC,8EAA8E;QAE9E,iDAAiD;QACxC,OAAE,GAAW,UAAU,EAAE,CAAC;QAEnC,8DAA8D;QACtD,WAAM,GAAiD,YAAY,CAAC;QAc5E,8EAA8E;QAC9E,iBAAiB;QACjB,8EAA8E;QAE9E,2EAA2E;QACnE,cAAS,GAAkB,IAAI,CAAC;QA2BtC,IAAI,CAAC,gBAAgB,GAAG,MAAM,EAAE,gBAAgB,IAAI,KAAK,CAAC;QAE1D,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,SAAS,EAAE,CAAC,IAAqB,EAAE,EAAE;YAC9C,MAAM,MAAM,GAAG,IAAI,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;YAC/C,IAAI,CAAC,MAAM;gBAAE,OAAO;YAEpB,QAAQ,MAAM,CAAC,IAAI,EAAE,CAAC;gBACpB,KAAK,OAAO,CAAC,CAAC,CAAC;oBACb,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,CAAC;oBAClC,IAAI,CAAC,MAAM,GAAG,MAAM,CAAC;oBACrB,+DAA+D;oBAC/D,wEAAwE;oBACxE,MAAM,OAAO,GAAG,IAAI,CAAC,MAAM,CAAC,eAAe,EAAE,CAAC,MAAM,CAAC,SAAS,CAAC,CAAC;oBAChE,IAAI,OAAO;wBAAE,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACnC,MAAM;gBACR,CAAC;gBAED,KAAK,OAAO,CAAC,CAAC,CAAC;oBACb,iEAAiE;oBACjE,MAAM,QAAQ,GAAG,mBAAmB,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC;oBAErD,gEAAgE;oBAChE,6DAA6D;oBAC7D,+CAA+C;oBAC/C,MAAM,KAAK,GAAG,IAAI,UAAU,CAC1B,QAAQ,CAAC,MAAM,EACf,QAAQ,CAAC,UAAU,EACnB,QAAQ,CAAC,UAAU,GAAG,CAAC,CACxB,CAAC;oBAEF,iEAAiE;oBACjE,uEAAuE;oBACvE,MAAM,SAAS,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,IAAI,CAAC,gBAAgB,CAAC,CAAC;oBAEpE,wEAAwE;oBACxE,2EAA2E;oBAC3E,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC,SAAS,CAAC,MAAM,CAAC,CAAC;oBACnD,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,CAAC,MAAM,EAAE,CAAC,EAAE,EAAE,CAAC;wBAC1C,OAAO,CAAC,CAAC,CAAC,GAAG,SAAS,CAAC,CAAC,CAAC,GAAG,KAAK,CAAC;oBACpC,CAAC;oBAED,MAAM,KAAK,GAAe;wBACxB,OAAO,EAAE,OAAO;wBAChB,UAAU,EAAE,IAAI,CAAC,gBAAgB;wBACjC,SAAS,EAAE,IAAI,CAAC,GAAG,EAAE;qBACtB,CAAC;oBACF,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,KAAK,CAAC,CAAC;oBAC1B,MAAM;gBACR,CAAC;gBAED,KAAK,MAAM;oBACT,gEAAgE;oBAChE,0CAA0C;oBAC1C,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,KAAK,EAAE,MAAM,CAAC,KAAK,EAAE,UAAU,EAAE,MAAM,CAAC,UAAU,EAAE,CAAC,CAAC;oBAC1E,MAAM;gBAER,KAAK,MAAM;oBACT,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC;oBACvB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;oBACnB,MAAM;gBAER,KAAK,MAAM;oBACT,kEAAkE;oBAClE,0DAA0D;oBAC1D,IAAI,CAAC,IAAI,CAAC,MAAM,EAAE,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,EAAE,CAAC,CAAC;oBACzC,MAAM;YACV,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,uEAAuE;QACvE,sEAAsE;QACtE,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,GAAG,EAAE;YACvB,IAAI,IAAI,CAAC,MAAM,KAAK,QAAQ,EAAE,CAAC;gBAC7B,IAAI,CAAC,MAAM,GAAG,QAAQ,CAAC;gBACvB,IAAI,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;YACrB,CAAC;QACH,CAAC,CAAC,CAAC;QAEH,IAAI,CAAC,EAAE,CAAC,EAAE,CAAC,OAAO,EAAE,CAAC,GAAU,EAAE,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,OAAO,EAAE,GAAG,CAAC,CAAC,CAAC;IAC/D,CAAC;IAED,8EAA8E;IAC9E,uCAAuC;IACvC,8EAA8E;IAE9E;;;;;;;;;;;OAWG;IACH,KAAK,CAAC,SAAS,CAAC,KAAwB;QACtC,IAAI,CAAC,IAAI,CAAC,SAAS,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM;YAAE,OAAO;QAEtD,wCAAwC;QACxC,MAAM,KAAK,GAAG,IAAI,UAAU,CAC1B,KAAK,CAAC,KAAK,CAAC,MAAM,EAClB,KAAK,CAAC,KAAK,CAAC,UAAU,EACtB,KAAK,CAAC,KAAK,CAAC,UAAU,GAAG,CAAC,CAC3B,CAAC;QAEF,kDAAkD;QAClD,yEAAyE;QACzE,+DAA+D;QAC/D,MAAM,WAAW,GAAG,IAAI,CAAC,QAAQ,CAAC,KAAK,EAAE,KAAK,CAAC,UAAU,EAAE,IAAI,CAAC,CAAC;QACjE,MAAM,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,MAAM,EAAE,WAAW,CAAC,UAAU,EAAE,WAAW,CAAC,UAAU,CAAC,CAAC;QAEjG,uEAAuE;QACvE,iCAAiC;QACjC,MAAM,KAAK,GAAG,mBAAmB,CAAC,QAAQ,EAAE,IAAI,CAAC,CAAC;QAElD,MAAM,SAAS,GAAG,IAAI,CAAC,MAAM,CAAC,cAAc,CAAC,KAAK,EAAE,IAAI,CAAC,SAAS,CAAC,CAAC;QACpE,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;IAC1B,CAAC;IAED;;;;;;OAMG;IACH,KAAK,CAAC,WAAW,CAAC,OAA0B;QAC1C,IAAI,IAAI,CAAC,MAAM,KAAK,MAAM;YAAE,OAAO;QACnC,IAAI,CAAC,EAAE,CAAC,IAAI,CAAC,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;IACxC,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,IAAa,EAAE,MAAe;QAClC,IAAI,CAAC,MAAM,GAAG,SAAS,CAAC;QACxB,IAAI,CAAC,EAAE,CAAC,KAAK,CAAC,IAAI,EAAE,MAAM,CAAC,CAAC;IAC9B,CAAC;IAED,8EAA8E;IAC9E,kBAAkB;IAClB,8EAA8E;IAE9E;;;;;;;;;;;;;;;OAeG;IACK,QAAQ,CAAC,KAAiB,EAAE,QAAgB,EAAE,MAAc;QAClE,sDAAsD;QACtD,IAAI,QAAQ,KAAK,MAAM;YAAE,OAAO,KAAK,CAAC;QAEtC,MAAM,KAAK,GAAG,QAAQ,GAAG,MAAM,CAAC;QAChC,MAAM,SAAS,GAAG,IAAI,CAAC,KAAK,CAAC,KAAK,CAAC,MAAM,GAAG,KAAK,CAAC,CAAC;QACnD,MAAM,MAAM,GAAG,IAAI,UAAU,CAAC,SAAS,CAAC,CAAC;QAEzC,KAAK,IAAI,CAAC,GAAG,CAAC,EAAE,CAAC,GAAG,SAAS,EAAE,CAAC,EAAE,EAAE,CAAC;YACnC,MAAM,MAAM,GAAG,CAAC,GAAG,KAAK,CAAC;YACzB,MAAM,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,CAAC,CAAC;YAC/B,MAAM,IAAI,GAAG,MAAM,GAAG,GAAG,CAAC;YAC1B,iDAAiD;YACjD,MAAM,CAAC,GAAG,KAAK,CAAC,GAAG,CAAC,IAAI,CAAC,CAAC;YAC1B,MAAM,CAAC,GAAG,KAAK,CAAC,IAAI,CAAC,GAAG,CAAC,GAAG,GAAG,CAAC,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;YAC1D,MAAM,CAAC,CAAC,CAAC,GAAG,IAAI,CAAC,KAAK,CAAC,CAAC,GAAG,IAAI,GAAG,CAAC,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC;QAC7C,CAAC;QAED,OAAO,MAAM,CAAC;IAChB,CAAC;CACF"}
|
|
@@ -1,9 +1,60 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Plivo Audio Stream WebSocket parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Plivo Audio Stream protocol
|
|
5
|
+
*
|
|
6
|
+
* Plivo's bidirectional Audio Stream (triggered by the `<Stream>` XML element)
|
|
7
|
+
* sends JSON-encoded messages over WebSocket for stream lifecycle and audio data.
|
|
8
|
+
*
|
|
9
|
+
* ### Inbound message shapes
|
|
10
|
+
*
|
|
11
|
+
* ```
|
|
12
|
+
* ┌─────────────────────────────────────────────────────────────────────┐
|
|
13
|
+
* │ event: "start" │
|
|
14
|
+
* │ stream_id: "s-xxx" │
|
|
15
|
+
* │ call_uuid: "u-xxx" │
|
|
16
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
17
|
+
* │ event: "media" │
|
|
18
|
+
* │ stream_id: "s-xxx" │
|
|
19
|
+
* │ media: { payload: "<base64 mu-law audio>" } │
|
|
20
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
21
|
+
* │ event: "stop" │
|
|
22
|
+
* │ stream_id: "s-xxx" │
|
|
23
|
+
* └─────────────────────────────────────────────────────────────────────┘
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* ### Outbound `playAudio` format
|
|
27
|
+
*
|
|
28
|
+
* To send audio back to the caller, the server sends a JSON `playAudio` event:
|
|
29
|
+
* ```json
|
|
30
|
+
* { "event": "playAudio", "media": { "payload": "<base64 mu-law audio>" } }
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* Note: unlike Twilio, Plivo's outbound format does NOT include a `streamSid`
|
|
34
|
+
* or `stream_id` field -- the audio is implicitly routed to the caller on the
|
|
35
|
+
* same WebSocket connection.
|
|
36
|
+
*
|
|
37
|
+
* ### Differences from Twilio and Telnyx
|
|
38
|
+
*
|
|
39
|
+
* - **No DTMF over media stream**: Plivo delivers DTMF via `<GetDigits>`
|
|
40
|
+
* XML callback webhooks (as a `Digits` POST parameter), not over the
|
|
41
|
+
* WebSocket stream.
|
|
42
|
+
* - **No outbound track filtering**: Plivo does not echo outbound audio back
|
|
43
|
+
* on the stream, so no `track` field filtering is needed.
|
|
44
|
+
* - **No connection acknowledgment**: Plivo does not require a `connected`
|
|
45
|
+
* handshake message after the WebSocket opens.
|
|
46
|
+
* - **Uses `call_uuid`**: Plivo's call identifier field is `call_uuid`
|
|
47
|
+
* (vs. Twilio's `callSid` and Telnyx's `call_control_id`).
|
|
48
|
+
*
|
|
49
|
+
* @see {@link https://www.plivo.com/docs/voice/xml/stream}
|
|
50
|
+
* @module @framers/agentos/voice/parsers/PlivoMediaStreamParser
|
|
51
|
+
*/
|
|
1
52
|
import type { MediaStreamParser, MediaStreamIncoming } from '../MediaStreamParser.js';
|
|
2
53
|
/**
|
|
3
54
|
* Parses the Plivo Audio Stream WebSocket protocol.
|
|
4
55
|
*
|
|
5
56
|
* Plivo sends JSON-encoded messages for stream lifecycle events (`start`,
|
|
6
|
-
* `stop`) and audio chunks (`media`).
|
|
57
|
+
* `stop`) and audio chunks (`media`). The audio payload is base64-encoded
|
|
7
58
|
* mu-law PCM, delivered in a `payload` field inside the `media` object.
|
|
8
59
|
*
|
|
9
60
|
* Outgoing audio is wrapped in a `playAudio` JSON envelope, which is the
|
|
@@ -17,10 +68,15 @@ export declare class PlivoMediaStreamParser implements MediaStreamParser {
|
|
|
17
68
|
* Parse a raw WebSocket frame from Plivo's audio stream.
|
|
18
69
|
*
|
|
19
70
|
* Supported Plivo event types:
|
|
20
|
-
* - `start`
|
|
71
|
+
* - `start` -- stream established; `stream_id` maps to `streamSid`,
|
|
21
72
|
* `call_uuid` maps to `callSid`.
|
|
22
|
-
* - `media`
|
|
23
|
-
*
|
|
73
|
+
* - `media` -- audio chunk; `media.payload` contains base64-encoded mu-law
|
|
74
|
+
* PCM bytes.
|
|
75
|
+
* - `stop` -- stream ended (call terminated or stream explicitly closed).
|
|
76
|
+
*
|
|
77
|
+
* Any other event type is silently dropped by returning `null`. Malformed
|
|
78
|
+
* JSON or messages missing required fields (`event`, `stream_id`) also
|
|
79
|
+
* return `null`.
|
|
24
80
|
*
|
|
25
81
|
* @param data - Raw WebSocket frame payload (JSON string or Buffer from Plivo).
|
|
26
82
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -31,12 +87,14 @@ export declare class PlivoMediaStreamParser implements MediaStreamParser {
|
|
|
31
87
|
* Encode mu-law audio for transmission back to Plivo.
|
|
32
88
|
*
|
|
33
89
|
* Plivo requires audio to be base64-encoded and wrapped in a `playAudio`
|
|
34
|
-
* JSON envelope.
|
|
90
|
+
* JSON envelope. Unlike Twilio, the `streamSid` / `stream_id` is NOT
|
|
91
|
+
* included in the outbound message -- Plivo implicitly routes the audio
|
|
92
|
+
* to the caller on the same WebSocket connection.
|
|
35
93
|
*
|
|
36
94
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
37
95
|
* @param _streamSid - Unused by Plivo's `playAudio` format (accepted for
|
|
38
96
|
* interface parity with other parsers).
|
|
39
|
-
* @returns JSON string
|
|
97
|
+
* @returns JSON string: `{ event: 'playAudio', media: { payload: '<base64>' } }`
|
|
40
98
|
*/
|
|
41
99
|
formatOutgoing(audio: Buffer, _streamSid: string): string;
|
|
42
100
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"PlivoMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;;;GAYG;AACH,qBAAa,sBAAuB,YAAW,iBAAiB;IAC9D
|
|
1
|
+
{"version":3,"file":"PlivoMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;;;GAYG;AACH,qBAAa,sBAAuB,YAAW,iBAAiB;IAC9D;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IAwDhE;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM;CAM1D"}
|
|
@@ -1,8 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Plivo Audio Stream WebSocket parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Plivo Audio Stream protocol
|
|
5
|
+
*
|
|
6
|
+
* Plivo's bidirectional Audio Stream (triggered by the `<Stream>` XML element)
|
|
7
|
+
* sends JSON-encoded messages over WebSocket for stream lifecycle and audio data.
|
|
8
|
+
*
|
|
9
|
+
* ### Inbound message shapes
|
|
10
|
+
*
|
|
11
|
+
* ```
|
|
12
|
+
* ┌─────────────────────────────────────────────────────────────────────┐
|
|
13
|
+
* │ event: "start" │
|
|
14
|
+
* │ stream_id: "s-xxx" │
|
|
15
|
+
* │ call_uuid: "u-xxx" │
|
|
16
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
17
|
+
* │ event: "media" │
|
|
18
|
+
* │ stream_id: "s-xxx" │
|
|
19
|
+
* │ media: { payload: "<base64 mu-law audio>" } │
|
|
20
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
21
|
+
* │ event: "stop" │
|
|
22
|
+
* │ stream_id: "s-xxx" │
|
|
23
|
+
* └─────────────────────────────────────────────────────────────────────┘
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* ### Outbound `playAudio` format
|
|
27
|
+
*
|
|
28
|
+
* To send audio back to the caller, the server sends a JSON `playAudio` event:
|
|
29
|
+
* ```json
|
|
30
|
+
* { "event": "playAudio", "media": { "payload": "<base64 mu-law audio>" } }
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* Note: unlike Twilio, Plivo's outbound format does NOT include a `streamSid`
|
|
34
|
+
* or `stream_id` field -- the audio is implicitly routed to the caller on the
|
|
35
|
+
* same WebSocket connection.
|
|
36
|
+
*
|
|
37
|
+
* ### Differences from Twilio and Telnyx
|
|
38
|
+
*
|
|
39
|
+
* - **No DTMF over media stream**: Plivo delivers DTMF via `<GetDigits>`
|
|
40
|
+
* XML callback webhooks (as a `Digits` POST parameter), not over the
|
|
41
|
+
* WebSocket stream.
|
|
42
|
+
* - **No outbound track filtering**: Plivo does not echo outbound audio back
|
|
43
|
+
* on the stream, so no `track` field filtering is needed.
|
|
44
|
+
* - **No connection acknowledgment**: Plivo does not require a `connected`
|
|
45
|
+
* handshake message after the WebSocket opens.
|
|
46
|
+
* - **Uses `call_uuid`**: Plivo's call identifier field is `call_uuid`
|
|
47
|
+
* (vs. Twilio's `callSid` and Telnyx's `call_control_id`).
|
|
48
|
+
*
|
|
49
|
+
* @see {@link https://www.plivo.com/docs/voice/xml/stream}
|
|
50
|
+
* @module @framers/agentos/voice/parsers/PlivoMediaStreamParser
|
|
51
|
+
*/
|
|
1
52
|
/**
|
|
2
53
|
* Parses the Plivo Audio Stream WebSocket protocol.
|
|
3
54
|
*
|
|
4
55
|
* Plivo sends JSON-encoded messages for stream lifecycle events (`start`,
|
|
5
|
-
* `stop`) and audio chunks (`media`).
|
|
56
|
+
* `stop`) and audio chunks (`media`). The audio payload is base64-encoded
|
|
6
57
|
* mu-law PCM, delivered in a `payload` field inside the `media` object.
|
|
7
58
|
*
|
|
8
59
|
* Outgoing audio is wrapped in a `playAudio` JSON envelope, which is the
|
|
@@ -16,10 +67,15 @@ export class PlivoMediaStreamParser {
|
|
|
16
67
|
* Parse a raw WebSocket frame from Plivo's audio stream.
|
|
17
68
|
*
|
|
18
69
|
* Supported Plivo event types:
|
|
19
|
-
* - `start`
|
|
70
|
+
* - `start` -- stream established; `stream_id` maps to `streamSid`,
|
|
20
71
|
* `call_uuid` maps to `callSid`.
|
|
21
|
-
* - `media`
|
|
22
|
-
*
|
|
72
|
+
* - `media` -- audio chunk; `media.payload` contains base64-encoded mu-law
|
|
73
|
+
* PCM bytes.
|
|
74
|
+
* - `stop` -- stream ended (call terminated or stream explicitly closed).
|
|
75
|
+
*
|
|
76
|
+
* Any other event type is silently dropped by returning `null`. Malformed
|
|
77
|
+
* JSON or messages missing required fields (`event`, `stream_id`) also
|
|
78
|
+
* return `null`.
|
|
23
79
|
*
|
|
24
80
|
* @param data - Raw WebSocket frame payload (JSON string or Buffer from Plivo).
|
|
25
81
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -35,12 +91,15 @@ export class PlivoMediaStreamParser {
|
|
|
35
91
|
return null;
|
|
36
92
|
}
|
|
37
93
|
const event = msg['event'];
|
|
94
|
+
// Plivo uses `stream_id` as the stream identifier (same naming as Telnyx).
|
|
38
95
|
const streamSid = msg['stream_id'];
|
|
39
96
|
if (!event || !streamSid) {
|
|
40
97
|
return null;
|
|
41
98
|
}
|
|
42
99
|
switch (event) {
|
|
43
100
|
case 'start': {
|
|
101
|
+
// Plivo uses `call_uuid` as its unique call identifier,
|
|
102
|
+
// different from Twilio's `callSid` and Telnyx's `call_control_id`.
|
|
44
103
|
const callSid = msg['call_uuid'] ?? '';
|
|
45
104
|
const result = {
|
|
46
105
|
type: 'start',
|
|
@@ -75,12 +134,14 @@ export class PlivoMediaStreamParser {
|
|
|
75
134
|
* Encode mu-law audio for transmission back to Plivo.
|
|
76
135
|
*
|
|
77
136
|
* Plivo requires audio to be base64-encoded and wrapped in a `playAudio`
|
|
78
|
-
* JSON envelope.
|
|
137
|
+
* JSON envelope. Unlike Twilio, the `streamSid` / `stream_id` is NOT
|
|
138
|
+
* included in the outbound message -- Plivo implicitly routes the audio
|
|
139
|
+
* to the caller on the same WebSocket connection.
|
|
79
140
|
*
|
|
80
141
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
81
142
|
* @param _streamSid - Unused by Plivo's `playAudio` format (accepted for
|
|
82
143
|
* interface parity with other parsers).
|
|
83
|
-
* @returns JSON string
|
|
144
|
+
* @returns JSON string: `{ event: 'playAudio', media: { payload: '<base64>' } }`
|
|
84
145
|
*/
|
|
85
146
|
formatOutgoing(audio, _streamSid) {
|
|
86
147
|
return JSON.stringify({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"PlivoMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"PlivoMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AAIH;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,sBAAsB;IACjC;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,2EAA2E;QAC3E,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,wDAAwD;gBACxD,oEAAoE;gBACpE,MAAM,OAAO,GAAI,GAAG,CAAC,WAAW,CAAwB,IAAI,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,CAAuB,CAAC;gBAC1D,IAAI,CAAC,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAE7B,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC;oBAC1C,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,KAAK,EAAE,WAAW;YAClB,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;SAC7C,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -1,15 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Telnyx media stream WebSocket parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Telnyx's asymmetric protocol
|
|
5
|
+
*
|
|
6
|
+
* Telnyx uses a fundamentally different approach than Twilio for inbound vs.
|
|
7
|
+
* outbound audio on the media stream WebSocket:
|
|
8
|
+
*
|
|
9
|
+
* - **Inbound** (phone -> server): JSON-encoded messages with `event`, `stream_id`,
|
|
10
|
+
* and `media.chunk` (base64 mu-law audio) fields.
|
|
11
|
+
* - **Outbound** (server -> phone): **Raw binary** WebSocket frames containing
|
|
12
|
+
* mu-law PCM bytes directly, with no JSON envelope whatsoever.
|
|
13
|
+
*
|
|
14
|
+
* This asymmetry means {@link formatOutgoing} returns the `Buffer` unchanged,
|
|
15
|
+
* while {@link parseIncoming} parses JSON and base64-decodes the audio payload.
|
|
16
|
+
*
|
|
17
|
+
* ## Field name mapping
|
|
18
|
+
*
|
|
19
|
+
* Telnyx uses snake_case field names that differ from Twilio's conventions.
|
|
20
|
+
* This parser normalises them to the shared {@link MediaStreamIncoming} shape:
|
|
21
|
+
*
|
|
22
|
+
* | Telnyx field | Normalised field |
|
|
23
|
+
* |----------------------|-------------------|
|
|
24
|
+
* | `stream_id` | `streamSid` |
|
|
25
|
+
* | `call_control_id` | `callSid` |
|
|
26
|
+
* | `media.chunk` | `payload` (Buffer)|
|
|
27
|
+
* | `media.track` | (used for filtering, not emitted) |
|
|
28
|
+
*
|
|
29
|
+
* ## DTMF limitation
|
|
30
|
+
*
|
|
31
|
+
* Telnyx does NOT deliver DTMF events over the media stream WebSocket.
|
|
32
|
+
* DTMF key-presses arrive as `call.dtmf.received` HTTP webhook events and
|
|
33
|
+
* must be handled by {@link TelnyxVoiceProvider.parseWebhookEvent} instead.
|
|
34
|
+
*
|
|
35
|
+
* @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
|
|
36
|
+
* @module @framers/agentos/voice/parsers/TelnyxMediaStreamParser
|
|
37
|
+
*/
|
|
1
38
|
import type { MediaStreamParser, MediaStreamIncoming } from '../MediaStreamParser.js';
|
|
2
39
|
/**
|
|
3
40
|
* Parses the Telnyx media stream WebSocket protocol.
|
|
4
41
|
*
|
|
5
42
|
* Telnyx sends JSON-encoded messages for stream lifecycle events (`start`,
|
|
6
|
-
* `stop`) and audio chunks (`media`).
|
|
7
|
-
* DTMF events over the media stream WebSocket
|
|
43
|
+
* `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
|
|
44
|
+
* DTMF events over the media stream WebSocket -- those arrive as HTTP webhooks
|
|
8
45
|
* to a separate endpoint and must be handled outside this parser.
|
|
9
46
|
*
|
|
10
47
|
* Outgoing audio is sent as a **raw binary Buffer** (mu-law PCM bytes without
|
|
11
48
|
* any JSON envelope) because Telnyx accepts unframed binary WebSocket frames
|
|
12
|
-
* directly.
|
|
49
|
+
* directly. No explicit connection acknowledgment is needed after the
|
|
13
50
|
* handshake.
|
|
14
51
|
*
|
|
15
52
|
* @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
|
|
@@ -19,11 +56,15 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
|
|
|
19
56
|
* Parse a raw WebSocket frame from Telnyx's media stream.
|
|
20
57
|
*
|
|
21
58
|
* Supported Telnyx event types:
|
|
22
|
-
* - `start`
|
|
59
|
+
* - `start` -- stream established; `stream_id` maps to `streamSid`,
|
|
23
60
|
* `call_control_id` maps to `callSid`.
|
|
24
|
-
* - `media`
|
|
25
|
-
* bytes; only `inbound` track frames are returned
|
|
26
|
-
*
|
|
61
|
+
* - `media` -- audio chunk; `media.chunk` field contains base64-encoded mu-law
|
|
62
|
+
* bytes; only `inbound` track frames are returned (outbound echoes are
|
|
63
|
+
* discarded to prevent feedback loops).
|
|
64
|
+
* - `stop` -- stream ended (call terminated or stream explicitly closed).
|
|
65
|
+
*
|
|
66
|
+
* Any other event type (e.g., future Telnyx additions, DTMF attempts) is
|
|
67
|
+
* silently dropped by returning `null`.
|
|
27
68
|
*
|
|
28
69
|
* @param data - Raw WebSocket frame payload (JSON string or Buffer from Telnyx).
|
|
29
70
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -33,7 +74,9 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
|
|
|
33
74
|
/**
|
|
34
75
|
* Encode mu-law audio for transmission back to Telnyx.
|
|
35
76
|
*
|
|
36
|
-
* Telnyx accepts raw binary WebSocket frames
|
|
77
|
+
* Telnyx accepts raw binary WebSocket frames -- no JSON wrapping is needed.
|
|
78
|
+
* This is the key asymmetry in Telnyx's protocol: inbound is JSON, outbound
|
|
79
|
+
* is raw binary.
|
|
37
80
|
*
|
|
38
81
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
39
82
|
* @param _streamSid - Unused by Telnyx binary framing (accepted for interface
|
|
@@ -44,6 +87,10 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
|
|
|
44
87
|
/**
|
|
45
88
|
* No explicit connection acknowledgment is required by Telnyx.
|
|
46
89
|
*
|
|
90
|
+
* Unlike Twilio, Telnyx does not need a `connected` handshake message
|
|
91
|
+
* before it starts sending media events.
|
|
92
|
+
*
|
|
93
|
+
* @param _streamSid - Unused (accepted for interface parity).
|
|
47
94
|
* @returns Always `null`.
|
|
48
95
|
*/
|
|
49
96
|
formatConnected(_streamSid: string): null;
|