@framers/agentos 0.1.111 → 0.1.113
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/api/strategies/debate.d.ts +12 -1
- package/dist/api/strategies/debate.d.ts.map +1 -1
- package/dist/api/strategies/debate.js +41 -5
- package/dist/api/strategies/debate.js.map +1 -1
- package/dist/api/strategies/hierarchical.d.ts +15 -1
- package/dist/api/strategies/hierarchical.d.ts.map +1 -1
- package/dist/api/strategies/hierarchical.js +51 -7
- package/dist/api/strategies/hierarchical.js.map +1 -1
- package/dist/api/strategies/index.d.ts +26 -4
- package/dist/api/strategies/index.d.ts.map +1 -1
- package/dist/api/strategies/index.js +26 -4
- package/dist/api/strategies/index.js.map +1 -1
- package/dist/api/strategies/parallel.d.ts +15 -4
- package/dist/api/strategies/parallel.d.ts.map +1 -1
- package/dist/api/strategies/parallel.js +53 -16
- package/dist/api/strategies/parallel.js.map +1 -1
- package/dist/api/strategies/review-loop.d.ts +15 -1
- package/dist/api/strategies/review-loop.d.ts.map +1 -1
- package/dist/api/strategies/review-loop.js +36 -10
- package/dist/api/strategies/review-loop.js.map +1 -1
- package/dist/api/strategies/sequential.d.ts +11 -1
- package/dist/api/strategies/sequential.d.ts.map +1 -1
- package/dist/api/strategies/sequential.js +39 -8
- package/dist/api/strategies/sequential.js.map +1 -1
- package/dist/api/strategies/shared.d.ts +71 -7
- package/dist/api/strategies/shared.d.ts.map +1 -1
- package/dist/api/strategies/shared.js +89 -10
- package/dist/api/strategies/shared.js.map +1 -1
- package/dist/api/types.d.ts +54 -1
- package/dist/api/types.d.ts.map +1 -1
- package/dist/api/types.js.map +1 -1
- package/dist/memory/facade/Memory.d.ts.map +1 -1
- package/dist/memory/facade/Memory.js +8 -0
- package/dist/memory/facade/Memory.js.map +1 -1
- package/dist/memory/facade/types.d.ts +10 -0
- package/dist/memory/facade/types.d.ts.map +1 -1
- package/dist/memory/index.d.ts +6 -0
- package/dist/memory/index.d.ts.map +1 -1
- package/dist/memory/index.js +5 -0
- package/dist/memory/index.js.map +1 -1
- package/dist/memory/observation/MemoryObserver.d.ts +63 -1
- package/dist/memory/observation/MemoryObserver.d.ts.map +1 -1
- package/dist/memory/observation/MemoryObserver.js +115 -4
- package/dist/memory/observation/MemoryObserver.js.map +1 -1
- package/dist/memory/observation/ObservationCompressor.d.ts +88 -0
- package/dist/memory/observation/ObservationCompressor.d.ts.map +1 -0
- package/dist/memory/observation/ObservationCompressor.js +207 -0
- package/dist/memory/observation/ObservationCompressor.js.map +1 -0
- package/dist/memory/observation/ObservationReflector.d.ts +82 -0
- package/dist/memory/observation/ObservationReflector.d.ts.map +1 -0
- package/dist/memory/observation/ObservationReflector.js +212 -0
- package/dist/memory/observation/ObservationReflector.js.map +1 -0
- package/dist/memory/observation/temporal.d.ts +54 -0
- package/dist/memory/observation/temporal.d.ts.map +1 -0
- package/dist/memory/observation/temporal.js +115 -0
- package/dist/memory/observation/temporal.js.map +1 -0
- package/dist/orchestration/builders/VoiceNodeBuilder.d.ts +82 -25
- package/dist/orchestration/builders/VoiceNodeBuilder.d.ts.map +1 -1
- package/dist/orchestration/builders/VoiceNodeBuilder.js +86 -26
- package/dist/orchestration/builders/VoiceNodeBuilder.js.map +1 -1
- package/dist/orchestration/events/GraphEvent.d.ts +67 -5
- package/dist/orchestration/events/GraphEvent.d.ts.map +1 -1
- package/dist/orchestration/events/GraphEvent.js.map +1 -1
- package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts +102 -25
- package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts.map +1 -1
- package/dist/orchestration/runtime/VoiceNodeExecutor.js +133 -38
- package/dist/orchestration/runtime/VoiceNodeExecutor.js.map +1 -1
- package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts +94 -32
- package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts.map +1 -1
- package/dist/orchestration/runtime/VoiceTransportAdapter.js +82 -28
- package/dist/orchestration/runtime/VoiceTransportAdapter.js.map +1 -1
- package/dist/orchestration/runtime/VoiceTurnCollector.d.ts +73 -20
- package/dist/orchestration/runtime/VoiceTurnCollector.d.ts.map +1 -1
- package/dist/orchestration/runtime/VoiceTurnCollector.js +84 -23
- package/dist/orchestration/runtime/VoiceTurnCollector.js.map +1 -1
- package/dist/voice/CallManager.d.ts.map +1 -1
- package/dist/voice/CallManager.js +9 -1
- package/dist/voice/CallManager.js.map +1 -1
- package/dist/voice/MediaStreamParser.d.ts +115 -6
- package/dist/voice/MediaStreamParser.d.ts.map +1 -1
- package/dist/voice/MediaStreamParser.js +44 -0
- package/dist/voice/MediaStreamParser.js.map +1 -1
- package/dist/voice/TelephonyStreamTransport.d.ts +112 -20
- package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -1
- package/dist/voice/TelephonyStreamTransport.js +136 -30
- package/dist/voice/TelephonyStreamTransport.js.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +64 -6
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.js +67 -6
- package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -1
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +55 -8
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/TelnyxMediaStreamParser.js +60 -9
- package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -1
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +73 -11
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -1
- package/dist/voice/parsers/TwilioMediaStreamParser.js +81 -12
- package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -1
- package/dist/voice/providers/plivo.d.ts +108 -12
- package/dist/voice/providers/plivo.d.ts.map +1 -1
- package/dist/voice/providers/plivo.js +106 -9
- package/dist/voice/providers/plivo.js.map +1 -1
- package/dist/voice/providers/telnyx.d.ts +110 -20
- package/dist/voice/providers/telnyx.d.ts.map +1 -1
- package/dist/voice/providers/telnyx.js +111 -20
- package/dist/voice/providers/telnyx.js.map +1 -1
- package/dist/voice/providers/twilio.d.ts +91 -13
- package/dist/voice/providers/twilio.d.ts.map +1 -1
- package/dist/voice/providers/twilio.js +94 -14
- package/dist/voice/providers/twilio.js.map +1 -1
- package/dist/voice/twiml.d.ts +70 -12
- package/dist/voice/twiml.d.ts.map +1 -1
- package/dist/voice/twiml.js +70 -12
- package/dist/voice/twiml.js.map +1 -1
- package/dist/voice/types.d.ts +142 -15
- package/dist/voice/types.d.ts.map +1 -1
- package/dist/voice/types.js +34 -3
- package/dist/voice/types.js.map +1 -1
- package/package.json +1 -1
|
@@ -1,8 +1,59 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Plivo Audio Stream WebSocket parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Plivo Audio Stream protocol
|
|
5
|
+
*
|
|
6
|
+
* Plivo's bidirectional Audio Stream (triggered by the `<Stream>` XML element)
|
|
7
|
+
* sends JSON-encoded messages over WebSocket for stream lifecycle and audio data.
|
|
8
|
+
*
|
|
9
|
+
* ### Inbound message shapes
|
|
10
|
+
*
|
|
11
|
+
* ```
|
|
12
|
+
* ┌─────────────────────────────────────────────────────────────────────┐
|
|
13
|
+
* │ event: "start" │
|
|
14
|
+
* │ stream_id: "s-xxx" │
|
|
15
|
+
* │ call_uuid: "u-xxx" │
|
|
16
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
17
|
+
* │ event: "media" │
|
|
18
|
+
* │ stream_id: "s-xxx" │
|
|
19
|
+
* │ media: { payload: "<base64 mu-law audio>" } │
|
|
20
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
21
|
+
* │ event: "stop" │
|
|
22
|
+
* │ stream_id: "s-xxx" │
|
|
23
|
+
* └─────────────────────────────────────────────────────────────────────┘
|
|
24
|
+
* ```
|
|
25
|
+
*
|
|
26
|
+
* ### Outbound `playAudio` format
|
|
27
|
+
*
|
|
28
|
+
* To send audio back to the caller, the server sends a JSON `playAudio` event:
|
|
29
|
+
* ```json
|
|
30
|
+
* { "event": "playAudio", "media": { "payload": "<base64 mu-law audio>" } }
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* Note: unlike Twilio, Plivo's outbound format does NOT include a `streamSid`
|
|
34
|
+
* or `stream_id` field -- the audio is implicitly routed to the caller on the
|
|
35
|
+
* same WebSocket connection.
|
|
36
|
+
*
|
|
37
|
+
* ### Differences from Twilio and Telnyx
|
|
38
|
+
*
|
|
39
|
+
* - **No DTMF over media stream**: Plivo delivers DTMF via `<GetDigits>`
|
|
40
|
+
* XML callback webhooks (as a `Digits` POST parameter), not over the
|
|
41
|
+
* WebSocket stream.
|
|
42
|
+
* - **No outbound track filtering**: Plivo does not echo outbound audio back
|
|
43
|
+
* on the stream, so no `track` field filtering is needed.
|
|
44
|
+
* - **No connection acknowledgment**: Plivo does not require a `connected`
|
|
45
|
+
* handshake message after the WebSocket opens.
|
|
46
|
+
* - **Uses `call_uuid`**: Plivo's call identifier field is `call_uuid`
|
|
47
|
+
* (vs. Twilio's `callSid` and Telnyx's `call_control_id`).
|
|
48
|
+
*
|
|
49
|
+
* @see {@link https://www.plivo.com/docs/voice/xml/stream}
|
|
50
|
+
* @module @framers/agentos/voice/parsers/PlivoMediaStreamParser
|
|
51
|
+
*/
|
|
1
52
|
/**
|
|
2
53
|
* Parses the Plivo Audio Stream WebSocket protocol.
|
|
3
54
|
*
|
|
4
55
|
* Plivo sends JSON-encoded messages for stream lifecycle events (`start`,
|
|
5
|
-
* `stop`) and audio chunks (`media`).
|
|
56
|
+
* `stop`) and audio chunks (`media`). The audio payload is base64-encoded
|
|
6
57
|
* mu-law PCM, delivered in a `payload` field inside the `media` object.
|
|
7
58
|
*
|
|
8
59
|
* Outgoing audio is wrapped in a `playAudio` JSON envelope, which is the
|
|
@@ -16,10 +67,15 @@ export class PlivoMediaStreamParser {
|
|
|
16
67
|
* Parse a raw WebSocket frame from Plivo's audio stream.
|
|
17
68
|
*
|
|
18
69
|
* Supported Plivo event types:
|
|
19
|
-
* - `start`
|
|
70
|
+
* - `start` -- stream established; `stream_id` maps to `streamSid`,
|
|
20
71
|
* `call_uuid` maps to `callSid`.
|
|
21
|
-
* - `media`
|
|
22
|
-
*
|
|
72
|
+
* - `media` -- audio chunk; `media.payload` contains base64-encoded mu-law
|
|
73
|
+
* PCM bytes.
|
|
74
|
+
* - `stop` -- stream ended (call terminated or stream explicitly closed).
|
|
75
|
+
*
|
|
76
|
+
* Any other event type is silently dropped by returning `null`. Malformed
|
|
77
|
+
* JSON or messages missing required fields (`event`, `stream_id`) also
|
|
78
|
+
* return `null`.
|
|
23
79
|
*
|
|
24
80
|
* @param data - Raw WebSocket frame payload (JSON string or Buffer from Plivo).
|
|
25
81
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -35,12 +91,15 @@ export class PlivoMediaStreamParser {
|
|
|
35
91
|
return null;
|
|
36
92
|
}
|
|
37
93
|
const event = msg['event'];
|
|
94
|
+
// Plivo uses `stream_id` as the stream identifier (same naming as Telnyx).
|
|
38
95
|
const streamSid = msg['stream_id'];
|
|
39
96
|
if (!event || !streamSid) {
|
|
40
97
|
return null;
|
|
41
98
|
}
|
|
42
99
|
switch (event) {
|
|
43
100
|
case 'start': {
|
|
101
|
+
// Plivo uses `call_uuid` as its unique call identifier,
|
|
102
|
+
// different from Twilio's `callSid` and Telnyx's `call_control_id`.
|
|
44
103
|
const callSid = msg['call_uuid'] ?? '';
|
|
45
104
|
const result = {
|
|
46
105
|
type: 'start',
|
|
@@ -75,12 +134,14 @@ export class PlivoMediaStreamParser {
|
|
|
75
134
|
* Encode mu-law audio for transmission back to Plivo.
|
|
76
135
|
*
|
|
77
136
|
* Plivo requires audio to be base64-encoded and wrapped in a `playAudio`
|
|
78
|
-
* JSON envelope.
|
|
137
|
+
* JSON envelope. Unlike Twilio, the `streamSid` / `stream_id` is NOT
|
|
138
|
+
* included in the outbound message -- Plivo implicitly routes the audio
|
|
139
|
+
* to the caller on the same WebSocket connection.
|
|
79
140
|
*
|
|
80
141
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
81
142
|
* @param _streamSid - Unused by Plivo's `playAudio` format (accepted for
|
|
82
143
|
* interface parity with other parsers).
|
|
83
|
-
* @returns JSON string
|
|
144
|
+
* @returns JSON string: `{ event: 'playAudio', media: { payload: '<base64>' } }`
|
|
84
145
|
*/
|
|
85
146
|
formatOutgoing(audio, _streamSid) {
|
|
86
147
|
return JSON.stringify({
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"PlivoMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"PlivoMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AAIH;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,sBAAsB;IACjC;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,2EAA2E;QAC3E,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,wDAAwD;gBACxD,oEAAoE;gBACpE,MAAM,OAAO,GAAI,GAAG,CAAC,WAAW,CAAwB,IAAI,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,CAAuB,CAAC;gBAC1D,IAAI,CAAC,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAE7B,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC;oBAC1C,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,KAAK,EAAE,WAAW;YAClB,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;SAC7C,CAAC,CAAC;IACL,CAAC;CACF"}
|
|
@@ -1,15 +1,52 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Telnyx media stream WebSocket parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Telnyx's asymmetric protocol
|
|
5
|
+
*
|
|
6
|
+
* Telnyx uses a fundamentally different approach than Twilio for inbound vs.
|
|
7
|
+
* outbound audio on the media stream WebSocket:
|
|
8
|
+
*
|
|
9
|
+
* - **Inbound** (phone -> server): JSON-encoded messages with `event`, `stream_id`,
|
|
10
|
+
* and `media.chunk` (base64 mu-law audio) fields.
|
|
11
|
+
* - **Outbound** (server -> phone): **Raw binary** WebSocket frames containing
|
|
12
|
+
* mu-law PCM bytes directly, with no JSON envelope whatsoever.
|
|
13
|
+
*
|
|
14
|
+
* This asymmetry means {@link formatOutgoing} returns the `Buffer` unchanged,
|
|
15
|
+
* while {@link parseIncoming} parses JSON and base64-decodes the audio payload.
|
|
16
|
+
*
|
|
17
|
+
* ## Field name mapping
|
|
18
|
+
*
|
|
19
|
+
* Telnyx uses snake_case field names that differ from Twilio's conventions.
|
|
20
|
+
* This parser normalises them to the shared {@link MediaStreamIncoming} shape:
|
|
21
|
+
*
|
|
22
|
+
* | Telnyx field | Normalised field |
|
|
23
|
+
* |----------------------|-------------------|
|
|
24
|
+
* | `stream_id` | `streamSid` |
|
|
25
|
+
* | `call_control_id` | `callSid` |
|
|
26
|
+
* | `media.chunk` | `payload` (Buffer)|
|
|
27
|
+
* | `media.track` | (used for filtering, not emitted) |
|
|
28
|
+
*
|
|
29
|
+
* ## DTMF limitation
|
|
30
|
+
*
|
|
31
|
+
* Telnyx does NOT deliver DTMF events over the media stream WebSocket.
|
|
32
|
+
* DTMF key-presses arrive as `call.dtmf.received` HTTP webhook events and
|
|
33
|
+
* must be handled by {@link TelnyxVoiceProvider.parseWebhookEvent} instead.
|
|
34
|
+
*
|
|
35
|
+
* @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
|
|
36
|
+
* @module @framers/agentos/voice/parsers/TelnyxMediaStreamParser
|
|
37
|
+
*/
|
|
1
38
|
import type { MediaStreamParser, MediaStreamIncoming } from '../MediaStreamParser.js';
|
|
2
39
|
/**
|
|
3
40
|
* Parses the Telnyx media stream WebSocket protocol.
|
|
4
41
|
*
|
|
5
42
|
* Telnyx sends JSON-encoded messages for stream lifecycle events (`start`,
|
|
6
|
-
* `stop`) and audio chunks (`media`).
|
|
7
|
-
* DTMF events over the media stream WebSocket
|
|
43
|
+
* `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
|
|
44
|
+
* DTMF events over the media stream WebSocket -- those arrive as HTTP webhooks
|
|
8
45
|
* to a separate endpoint and must be handled outside this parser.
|
|
9
46
|
*
|
|
10
47
|
* Outgoing audio is sent as a **raw binary Buffer** (mu-law PCM bytes without
|
|
11
48
|
* any JSON envelope) because Telnyx accepts unframed binary WebSocket frames
|
|
12
|
-
* directly.
|
|
49
|
+
* directly. No explicit connection acknowledgment is needed after the
|
|
13
50
|
* handshake.
|
|
14
51
|
*
|
|
15
52
|
* @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
|
|
@@ -19,11 +56,15 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
|
|
|
19
56
|
* Parse a raw WebSocket frame from Telnyx's media stream.
|
|
20
57
|
*
|
|
21
58
|
* Supported Telnyx event types:
|
|
22
|
-
* - `start`
|
|
59
|
+
* - `start` -- stream established; `stream_id` maps to `streamSid`,
|
|
23
60
|
* `call_control_id` maps to `callSid`.
|
|
24
|
-
* - `media`
|
|
25
|
-
* bytes; only `inbound` track frames are returned
|
|
26
|
-
*
|
|
61
|
+
* - `media` -- audio chunk; `media.chunk` field contains base64-encoded mu-law
|
|
62
|
+
* bytes; only `inbound` track frames are returned (outbound echoes are
|
|
63
|
+
* discarded to prevent feedback loops).
|
|
64
|
+
* - `stop` -- stream ended (call terminated or stream explicitly closed).
|
|
65
|
+
*
|
|
66
|
+
* Any other event type (e.g., future Telnyx additions, DTMF attempts) is
|
|
67
|
+
* silently dropped by returning `null`.
|
|
27
68
|
*
|
|
28
69
|
* @param data - Raw WebSocket frame payload (JSON string or Buffer from Telnyx).
|
|
29
70
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -33,7 +74,9 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
|
|
|
33
74
|
/**
|
|
34
75
|
* Encode mu-law audio for transmission back to Telnyx.
|
|
35
76
|
*
|
|
36
|
-
* Telnyx accepts raw binary WebSocket frames
|
|
77
|
+
* Telnyx accepts raw binary WebSocket frames -- no JSON wrapping is needed.
|
|
78
|
+
* This is the key asymmetry in Telnyx's protocol: inbound is JSON, outbound
|
|
79
|
+
* is raw binary.
|
|
37
80
|
*
|
|
38
81
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
39
82
|
* @param _streamSid - Unused by Telnyx binary framing (accepted for interface
|
|
@@ -44,6 +87,10 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
|
|
|
44
87
|
/**
|
|
45
88
|
* No explicit connection acknowledgment is required by Telnyx.
|
|
46
89
|
*
|
|
90
|
+
* Unlike Twilio, Telnyx does not need a `connected` handshake message
|
|
91
|
+
* before it starts sending media events.
|
|
92
|
+
*
|
|
93
|
+
* @param _streamSid - Unused (accepted for interface parity).
|
|
47
94
|
* @returns Always `null`.
|
|
48
95
|
*/
|
|
49
96
|
formatConnected(_streamSid: string): null;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TelnyxMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;;;;;GAcG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D
|
|
1
|
+
{"version":3,"file":"TelnyxMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;;;;;GAcG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IA6DhE;;;;;;;;;;;OAWG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM;IAIzD;;;;;;;;OAQG;IACH,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;CAG1C"}
|
|
@@ -1,14 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Telnyx media stream WebSocket parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Telnyx's asymmetric protocol
|
|
5
|
+
*
|
|
6
|
+
* Telnyx uses a fundamentally different approach than Twilio for inbound vs.
|
|
7
|
+
* outbound audio on the media stream WebSocket:
|
|
8
|
+
*
|
|
9
|
+
* - **Inbound** (phone -> server): JSON-encoded messages with `event`, `stream_id`,
|
|
10
|
+
* and `media.chunk` (base64 mu-law audio) fields.
|
|
11
|
+
* - **Outbound** (server -> phone): **Raw binary** WebSocket frames containing
|
|
12
|
+
* mu-law PCM bytes directly, with no JSON envelope whatsoever.
|
|
13
|
+
*
|
|
14
|
+
* This asymmetry means {@link formatOutgoing} returns the `Buffer` unchanged,
|
|
15
|
+
* while {@link parseIncoming} parses JSON and base64-decodes the audio payload.
|
|
16
|
+
*
|
|
17
|
+
* ## Field name mapping
|
|
18
|
+
*
|
|
19
|
+
* Telnyx uses snake_case field names that differ from Twilio's conventions.
|
|
20
|
+
* This parser normalises them to the shared {@link MediaStreamIncoming} shape:
|
|
21
|
+
*
|
|
22
|
+
* | Telnyx field | Normalised field |
|
|
23
|
+
* |----------------------|-------------------|
|
|
24
|
+
* | `stream_id` | `streamSid` |
|
|
25
|
+
* | `call_control_id` | `callSid` |
|
|
26
|
+
* | `media.chunk` | `payload` (Buffer)|
|
|
27
|
+
* | `media.track` | (used for filtering, not emitted) |
|
|
28
|
+
*
|
|
29
|
+
* ## DTMF limitation
|
|
30
|
+
*
|
|
31
|
+
* Telnyx does NOT deliver DTMF events over the media stream WebSocket.
|
|
32
|
+
* DTMF key-presses arrive as `call.dtmf.received` HTTP webhook events and
|
|
33
|
+
* must be handled by {@link TelnyxVoiceProvider.parseWebhookEvent} instead.
|
|
34
|
+
*
|
|
35
|
+
* @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
|
|
36
|
+
* @module @framers/agentos/voice/parsers/TelnyxMediaStreamParser
|
|
37
|
+
*/
|
|
1
38
|
/**
|
|
2
39
|
* Parses the Telnyx media stream WebSocket protocol.
|
|
3
40
|
*
|
|
4
41
|
* Telnyx sends JSON-encoded messages for stream lifecycle events (`start`,
|
|
5
|
-
* `stop`) and audio chunks (`media`).
|
|
6
|
-
* DTMF events over the media stream WebSocket
|
|
42
|
+
* `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
|
|
43
|
+
* DTMF events over the media stream WebSocket -- those arrive as HTTP webhooks
|
|
7
44
|
* to a separate endpoint and must be handled outside this parser.
|
|
8
45
|
*
|
|
9
46
|
* Outgoing audio is sent as a **raw binary Buffer** (mu-law PCM bytes without
|
|
10
47
|
* any JSON envelope) because Telnyx accepts unframed binary WebSocket frames
|
|
11
|
-
* directly.
|
|
48
|
+
* directly. No explicit connection acknowledgment is needed after the
|
|
12
49
|
* handshake.
|
|
13
50
|
*
|
|
14
51
|
* @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
|
|
@@ -18,11 +55,15 @@ export class TelnyxMediaStreamParser {
|
|
|
18
55
|
* Parse a raw WebSocket frame from Telnyx's media stream.
|
|
19
56
|
*
|
|
20
57
|
* Supported Telnyx event types:
|
|
21
|
-
* - `start`
|
|
58
|
+
* - `start` -- stream established; `stream_id` maps to `streamSid`,
|
|
22
59
|
* `call_control_id` maps to `callSid`.
|
|
23
|
-
* - `media`
|
|
24
|
-
* bytes; only `inbound` track frames are returned
|
|
25
|
-
*
|
|
60
|
+
* - `media` -- audio chunk; `media.chunk` field contains base64-encoded mu-law
|
|
61
|
+
* bytes; only `inbound` track frames are returned (outbound echoes are
|
|
62
|
+
* discarded to prevent feedback loops).
|
|
63
|
+
* - `stop` -- stream ended (call terminated or stream explicitly closed).
|
|
64
|
+
*
|
|
65
|
+
* Any other event type (e.g., future Telnyx additions, DTMF attempts) is
|
|
66
|
+
* silently dropped by returning `null`.
|
|
26
67
|
*
|
|
27
68
|
* @param data - Raw WebSocket frame payload (JSON string or Buffer from Telnyx).
|
|
28
69
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -38,12 +79,15 @@ export class TelnyxMediaStreamParser {
|
|
|
38
79
|
return null;
|
|
39
80
|
}
|
|
40
81
|
const event = msg['event'];
|
|
82
|
+
// Telnyx uses `stream_id` where Twilio uses `streamSid`.
|
|
41
83
|
const streamSid = msg['stream_id'];
|
|
42
84
|
if (!event || !streamSid) {
|
|
43
85
|
return null;
|
|
44
86
|
}
|
|
45
87
|
switch (event) {
|
|
46
88
|
case 'start': {
|
|
89
|
+
// Telnyx uses `call_control_id` as the call-leg identifier,
|
|
90
|
+
// equivalent to Twilio's `callSid`.
|
|
47
91
|
const callSid = msg['call_control_id'] ?? '';
|
|
48
92
|
const result = {
|
|
49
93
|
type: 'start',
|
|
@@ -56,10 +100,11 @@ export class TelnyxMediaStreamParser {
|
|
|
56
100
|
const media = msg['media'];
|
|
57
101
|
if (!media)
|
|
58
102
|
return null;
|
|
59
|
-
// Ignore outbound audio echoes from Telnyx.
|
|
103
|
+
// Ignore outbound audio echoes from Telnyx to prevent feedback.
|
|
60
104
|
const track = media['track'];
|
|
61
105
|
if (track === 'outbound')
|
|
62
106
|
return null;
|
|
107
|
+
// Telnyx names its audio payload field `chunk` (not `payload` like Twilio).
|
|
63
108
|
const chunk = media['chunk'];
|
|
64
109
|
if (!chunk)
|
|
65
110
|
return null;
|
|
@@ -81,7 +126,9 @@ export class TelnyxMediaStreamParser {
|
|
|
81
126
|
/**
|
|
82
127
|
* Encode mu-law audio for transmission back to Telnyx.
|
|
83
128
|
*
|
|
84
|
-
* Telnyx accepts raw binary WebSocket frames
|
|
129
|
+
* Telnyx accepts raw binary WebSocket frames -- no JSON wrapping is needed.
|
|
130
|
+
* This is the key asymmetry in Telnyx's protocol: inbound is JSON, outbound
|
|
131
|
+
* is raw binary.
|
|
85
132
|
*
|
|
86
133
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
87
134
|
* @param _streamSid - Unused by Telnyx binary framing (accepted for interface
|
|
@@ -94,6 +141,10 @@ export class TelnyxMediaStreamParser {
|
|
|
94
141
|
/**
|
|
95
142
|
* No explicit connection acknowledgment is required by Telnyx.
|
|
96
143
|
*
|
|
144
|
+
* Unlike Twilio, Telnyx does not need a `connected` handshake message
|
|
145
|
+
* before it starts sending media events.
|
|
146
|
+
*
|
|
147
|
+
* @param _streamSid - Unused (accepted for interface parity).
|
|
97
148
|
* @returns Always `null`.
|
|
98
149
|
*/
|
|
99
150
|
formatConnected(_streamSid) {
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TelnyxMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"TelnyxMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAIH;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,uBAAuB;IAClC;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,yDAAyD;QACzD,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,4DAA4D;gBAC5D,oCAAoC;gBACpC,MAAM,OAAO,GAAI,GAAG,CAAC,iBAAiB,CAAwB,IAAI,EAAE,CAAC;gBACrE,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,gEAAgE;gBAChE,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAuB,CAAC;gBACnD,IAAI,KAAK,KAAK,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAEtC,4EAA4E;gBAC5E,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAuB,CAAC;gBACnD,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC;oBACrC,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;;;OAWG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;;;;OAQG;IACH,eAAe,CAAC,UAAkB;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
|
|
@@ -1,10 +1,64 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Twilio `<Connect><Stream>` WebSocket media stream parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Twilio media stream protocol
|
|
5
|
+
*
|
|
6
|
+
* When a Twilio call executes the TwiML `<Connect><Stream url="wss://..." />`,
|
|
7
|
+
* Twilio opens a WebSocket to the specified URL and sends **all messages as
|
|
8
|
+
* JSON-encoded strings** (never raw binary). Each message has an `event` field
|
|
9
|
+
* and a `streamSid` field that together identify the event type and stream.
|
|
10
|
+
*
|
|
11
|
+
* ### Inbound JSON message shapes
|
|
12
|
+
*
|
|
13
|
+
* ```
|
|
14
|
+
* ┌─────────────────────────────────────────────────────────────────────┐
|
|
15
|
+
* │ event: "start" │
|
|
16
|
+
* │ streamSid: "MZxxx" │
|
|
17
|
+
* │ start: { callSid, accountSid, mediaFormat: { encoding, ... } } │
|
|
18
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
19
|
+
* │ event: "media" │
|
|
20
|
+
* │ streamSid: "MZxxx" │
|
|
21
|
+
* │ media: { track: "inbound"|"outbound", payload: "<base64>" } │
|
|
22
|
+
* │ sequenceNumber: 42 │
|
|
23
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
24
|
+
* │ event: "dtmf" │
|
|
25
|
+
* │ streamSid: "MZxxx" │
|
|
26
|
+
* │ dtmf: { digit: "5", duration: 500 } │
|
|
27
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
28
|
+
* │ event: "mark" │
|
|
29
|
+
* │ streamSid: "MZxxx" │
|
|
30
|
+
* │ mark: { name: "utterance-done" } │
|
|
31
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
32
|
+
* │ event: "stop" │
|
|
33
|
+
* │ streamSid: "MZxxx" │
|
|
34
|
+
* └─────────────────────────────────────────────────────────────────────┘
|
|
35
|
+
* ```
|
|
36
|
+
*
|
|
37
|
+
* ### Outbound audio format
|
|
38
|
+
*
|
|
39
|
+
* Audio sent back to Twilio must be wrapped in a JSON `media` envelope:
|
|
40
|
+
* ```json
|
|
41
|
+
* { "event": "media", "streamSid": "MZxxx", "media": { "payload": "<base64>" } }
|
|
42
|
+
* ```
|
|
43
|
+
*
|
|
44
|
+
* ### Connection acknowledgment
|
|
45
|
+
*
|
|
46
|
+
* Immediately after the WebSocket handshake, the server must send:
|
|
47
|
+
* ```json
|
|
48
|
+
* { "event": "connected", "protocol": "Call", "version": "1.0.0" }
|
|
49
|
+
* ```
|
|
50
|
+
* This tells Twilio the listener is ready to receive media.
|
|
51
|
+
*
|
|
52
|
+
* @see {@link https://www.twilio.com/docs/voice/twiml/stream}
|
|
53
|
+
* @module @framers/agentos/voice/parsers/TwilioMediaStreamParser
|
|
54
|
+
*/
|
|
1
55
|
import type { MediaStreamParser, MediaStreamIncoming } from '../MediaStreamParser.js';
|
|
2
56
|
/**
|
|
3
57
|
* Parses the Twilio `<Connect><Stream>` WebSocket media stream protocol.
|
|
4
58
|
*
|
|
5
|
-
* Twilio sends all messages as JSON-encoded strings.
|
|
59
|
+
* Twilio sends all messages as JSON-encoded strings. Outbound audio is
|
|
6
60
|
* wrapped in the same JSON envelope so Twilio can associate it with the
|
|
7
|
-
* correct stream.
|
|
61
|
+
* correct stream. An explicit `connected` acknowledgment is sent once
|
|
8
62
|
* immediately after the WebSocket handshake to signal that the listener is
|
|
9
63
|
* ready to receive media.
|
|
10
64
|
*
|
|
@@ -15,11 +69,15 @@ export declare class TwilioMediaStreamParser implements MediaStreamParser {
|
|
|
15
69
|
* Parse a raw WebSocket frame from Twilio's media stream.
|
|
16
70
|
*
|
|
17
71
|
* Supported Twilio event types:
|
|
18
|
-
* - `start`
|
|
19
|
-
* - `media`
|
|
20
|
-
*
|
|
21
|
-
* - `
|
|
22
|
-
* - `
|
|
72
|
+
* - `start` -- stream established, includes callSid and media format metadata.
|
|
73
|
+
* - `media` -- audio chunk (inbound track only; outbound echoes are discarded
|
|
74
|
+
* to prevent feedback loops).
|
|
75
|
+
* - `dtmf` -- DTMF keypress detected on the audio stream.
|
|
76
|
+
* - `stop` -- stream ended (call hangup or stream disconnect).
|
|
77
|
+
* - `mark` -- named synchronisation marker confirming playback reached a point.
|
|
78
|
+
*
|
|
79
|
+
* Messages with missing `event` or `streamSid` fields, malformed JSON,
|
|
80
|
+
* or unrecognised event types are silently dropped (return `null`).
|
|
23
81
|
*
|
|
24
82
|
* @param data - Raw WebSocket frame payload (always a JSON string from Twilio).
|
|
25
83
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -30,20 +88,24 @@ export declare class TwilioMediaStreamParser implements MediaStreamParser {
|
|
|
30
88
|
* Encode mu-law audio for transmission back to the Twilio stream.
|
|
31
89
|
*
|
|
32
90
|
* Twilio requires base64-encoded audio wrapped in a JSON `media` envelope
|
|
33
|
-
* so it can route the audio to the correct stream
|
|
91
|
+
* so it can route the audio to the correct stream by `streamSid`.
|
|
34
92
|
*
|
|
35
93
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
36
94
|
* @param streamSid - The stream identifier to include in the envelope.
|
|
37
|
-
* @returns JSON string conforming to the Twilio media-out envelope format
|
|
95
|
+
* @returns JSON string conforming to the Twilio media-out envelope format:
|
|
96
|
+
* `{ event: 'media', streamSid: '...', media: { payload: '<base64>' } }`
|
|
38
97
|
*/
|
|
39
98
|
formatOutgoing(audio: Buffer, streamSid: string): string;
|
|
40
99
|
/**
|
|
41
100
|
* Generate the initial `connected` acknowledgment expected by Twilio
|
|
42
101
|
* immediately after the WebSocket connection is established.
|
|
43
102
|
*
|
|
44
|
-
*
|
|
103
|
+
* Without this message, Twilio waits indefinitely for a response and
|
|
104
|
+
* eventually times out the stream connection.
|
|
105
|
+
*
|
|
106
|
+
* @param _streamSid - Unused -- Twilio does not require the stream ID in the
|
|
45
107
|
* `connected` message, but the parameter is accepted for interface parity.
|
|
46
|
-
* @returns JSON string
|
|
108
|
+
* @returns JSON string: `{ event: 'connected', protocol: 'Call', version: '1.0.0' }`
|
|
47
109
|
*/
|
|
48
110
|
formatConnected(_streamSid: string): string;
|
|
49
111
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"TwilioMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TwilioMediaStreamParser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;GAUG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D
|
|
1
|
+
{"version":3,"file":"TwilioMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TwilioMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqDG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;GAUG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IAsGhE;;;;;;;;;;OAUG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAQxD;;;;;;;;;;OAUG;IACH,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM;CAO5C"}
|
|
@@ -1,9 +1,63 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* @fileoverview Twilio `<Connect><Stream>` WebSocket media stream parser.
|
|
3
|
+
*
|
|
4
|
+
* ## Twilio media stream protocol
|
|
5
|
+
*
|
|
6
|
+
* When a Twilio call executes the TwiML `<Connect><Stream url="wss://..." />`,
|
|
7
|
+
* Twilio opens a WebSocket to the specified URL and sends **all messages as
|
|
8
|
+
* JSON-encoded strings** (never raw binary). Each message has an `event` field
|
|
9
|
+
* and a `streamSid` field that together identify the event type and stream.
|
|
10
|
+
*
|
|
11
|
+
* ### Inbound JSON message shapes
|
|
12
|
+
*
|
|
13
|
+
* ```
|
|
14
|
+
* ┌─────────────────────────────────────────────────────────────────────┐
|
|
15
|
+
* │ event: "start" │
|
|
16
|
+
* │ streamSid: "MZxxx" │
|
|
17
|
+
* │ start: { callSid, accountSid, mediaFormat: { encoding, ... } } │
|
|
18
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
19
|
+
* │ event: "media" │
|
|
20
|
+
* │ streamSid: "MZxxx" │
|
|
21
|
+
* │ media: { track: "inbound"|"outbound", payload: "<base64>" } │
|
|
22
|
+
* │ sequenceNumber: 42 │
|
|
23
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
24
|
+
* │ event: "dtmf" │
|
|
25
|
+
* │ streamSid: "MZxxx" │
|
|
26
|
+
* │ dtmf: { digit: "5", duration: 500 } │
|
|
27
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
28
|
+
* │ event: "mark" │
|
|
29
|
+
* │ streamSid: "MZxxx" │
|
|
30
|
+
* │ mark: { name: "utterance-done" } │
|
|
31
|
+
* ├─────────────────────────────────────────────────────────────────────┤
|
|
32
|
+
* │ event: "stop" │
|
|
33
|
+
* │ streamSid: "MZxxx" │
|
|
34
|
+
* └─────────────────────────────────────────────────────────────────────┘
|
|
35
|
+
* ```
|
|
36
|
+
*
|
|
37
|
+
* ### Outbound audio format
|
|
38
|
+
*
|
|
39
|
+
* Audio sent back to Twilio must be wrapped in a JSON `media` envelope:
|
|
40
|
+
* ```json
|
|
41
|
+
* { "event": "media", "streamSid": "MZxxx", "media": { "payload": "<base64>" } }
|
|
42
|
+
* ```
|
|
43
|
+
*
|
|
44
|
+
* ### Connection acknowledgment
|
|
45
|
+
*
|
|
46
|
+
* Immediately after the WebSocket handshake, the server must send:
|
|
47
|
+
* ```json
|
|
48
|
+
* { "event": "connected", "protocol": "Call", "version": "1.0.0" }
|
|
49
|
+
* ```
|
|
50
|
+
* This tells Twilio the listener is ready to receive media.
|
|
51
|
+
*
|
|
52
|
+
* @see {@link https://www.twilio.com/docs/voice/twiml/stream}
|
|
53
|
+
* @module @framers/agentos/voice/parsers/TwilioMediaStreamParser
|
|
54
|
+
*/
|
|
1
55
|
/**
|
|
2
56
|
* Parses the Twilio `<Connect><Stream>` WebSocket media stream protocol.
|
|
3
57
|
*
|
|
4
|
-
* Twilio sends all messages as JSON-encoded strings.
|
|
58
|
+
* Twilio sends all messages as JSON-encoded strings. Outbound audio is
|
|
5
59
|
* wrapped in the same JSON envelope so Twilio can associate it with the
|
|
6
|
-
* correct stream.
|
|
60
|
+
* correct stream. An explicit `connected` acknowledgment is sent once
|
|
7
61
|
* immediately after the WebSocket handshake to signal that the listener is
|
|
8
62
|
* ready to receive media.
|
|
9
63
|
*
|
|
@@ -14,11 +68,15 @@ export class TwilioMediaStreamParser {
|
|
|
14
68
|
* Parse a raw WebSocket frame from Twilio's media stream.
|
|
15
69
|
*
|
|
16
70
|
* Supported Twilio event types:
|
|
17
|
-
* - `start`
|
|
18
|
-
* - `media`
|
|
19
|
-
*
|
|
20
|
-
* - `
|
|
21
|
-
* - `
|
|
71
|
+
* - `start` -- stream established, includes callSid and media format metadata.
|
|
72
|
+
* - `media` -- audio chunk (inbound track only; outbound echoes are discarded
|
|
73
|
+
* to prevent feedback loops).
|
|
74
|
+
* - `dtmf` -- DTMF keypress detected on the audio stream.
|
|
75
|
+
* - `stop` -- stream ended (call hangup or stream disconnect).
|
|
76
|
+
* - `mark` -- named synchronisation marker confirming playback reached a point.
|
|
77
|
+
*
|
|
78
|
+
* Messages with missing `event` or `streamSid` fields, malformed JSON,
|
|
79
|
+
* or unrecognised event types are silently dropped (return `null`).
|
|
22
80
|
*
|
|
23
81
|
* @param data - Raw WebSocket frame payload (always a JSON string from Twilio).
|
|
24
82
|
* @returns Normalised {@link MediaStreamIncoming} event, or `null` for
|
|
@@ -35,12 +93,14 @@ export class TwilioMediaStreamParser {
|
|
|
35
93
|
}
|
|
36
94
|
const event = msg['event'];
|
|
37
95
|
const streamSid = msg['streamSid'];
|
|
96
|
+
// Both fields are required on every Twilio media stream message.
|
|
38
97
|
if (!event || !streamSid) {
|
|
39
98
|
return null;
|
|
40
99
|
}
|
|
41
100
|
switch (event) {
|
|
42
101
|
case 'start': {
|
|
43
102
|
const startPayload = msg['start'];
|
|
103
|
+
// callSid identifies the Twilio call leg this stream belongs to.
|
|
44
104
|
const callSid = startPayload?.['callSid'] ?? '';
|
|
45
105
|
const result = {
|
|
46
106
|
type: 'start',
|
|
@@ -54,7 +114,9 @@ export class TwilioMediaStreamParser {
|
|
|
54
114
|
const media = msg['media'];
|
|
55
115
|
if (!media)
|
|
56
116
|
return null;
|
|
57
|
-
//
|
|
117
|
+
// Twilio sends both inbound and outbound audio on the same stream.
|
|
118
|
+
// Outbound echoes must be discarded to prevent feedback loops where
|
|
119
|
+
// the agent hears its own TTS output.
|
|
58
120
|
const track = media['track'];
|
|
59
121
|
if (track === 'outbound')
|
|
60
122
|
return null;
|
|
@@ -79,6 +141,7 @@ export class TwilioMediaStreamParser {
|
|
|
79
141
|
const digit = dtmf['digit'];
|
|
80
142
|
if (!digit)
|
|
81
143
|
return null;
|
|
144
|
+
// Twilio reports DTMF key-hold duration in milliseconds.
|
|
82
145
|
const duration = typeof dtmf['duration'] === 'number'
|
|
83
146
|
? dtmf['duration']
|
|
84
147
|
: undefined;
|
|
@@ -105,6 +168,8 @@ export class TwilioMediaStreamParser {
|
|
|
105
168
|
return result;
|
|
106
169
|
}
|
|
107
170
|
default:
|
|
171
|
+
// Twilio may add new event types in the future; silently ignore them
|
|
172
|
+
// rather than throwing so existing deployments remain forward-compatible.
|
|
108
173
|
return null;
|
|
109
174
|
}
|
|
110
175
|
}
|
|
@@ -112,11 +177,12 @@ export class TwilioMediaStreamParser {
|
|
|
112
177
|
* Encode mu-law audio for transmission back to the Twilio stream.
|
|
113
178
|
*
|
|
114
179
|
* Twilio requires base64-encoded audio wrapped in a JSON `media` envelope
|
|
115
|
-
* so it can route the audio to the correct stream
|
|
180
|
+
* so it can route the audio to the correct stream by `streamSid`.
|
|
116
181
|
*
|
|
117
182
|
* @param audio - Raw mu-law PCM bytes to send to the caller.
|
|
118
183
|
* @param streamSid - The stream identifier to include in the envelope.
|
|
119
|
-
* @returns JSON string conforming to the Twilio media-out envelope format
|
|
184
|
+
* @returns JSON string conforming to the Twilio media-out envelope format:
|
|
185
|
+
* `{ event: 'media', streamSid: '...', media: { payload: '<base64>' } }`
|
|
120
186
|
*/
|
|
121
187
|
formatOutgoing(audio, streamSid) {
|
|
122
188
|
return JSON.stringify({
|
|
@@ -129,9 +195,12 @@ export class TwilioMediaStreamParser {
|
|
|
129
195
|
* Generate the initial `connected` acknowledgment expected by Twilio
|
|
130
196
|
* immediately after the WebSocket connection is established.
|
|
131
197
|
*
|
|
132
|
-
*
|
|
198
|
+
* Without this message, Twilio waits indefinitely for a response and
|
|
199
|
+
* eventually times out the stream connection.
|
|
200
|
+
*
|
|
201
|
+
* @param _streamSid - Unused -- Twilio does not require the stream ID in the
|
|
133
202
|
* `connected` message, but the parameter is accepted for interface parity.
|
|
134
|
-
* @returns JSON string
|
|
203
|
+
* @returns JSON string: `{ event: 'connected', protocol: 'Call', version: '1.0.0' }`
|
|
135
204
|
*/
|
|
136
205
|
formatConnected(_streamSid) {
|
|
137
206
|
return JSON.stringify({
|