@framers/agentos 0.1.111 → 0.1.113

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/dist/api/strategies/debate.d.ts +12 -1
  2. package/dist/api/strategies/debate.d.ts.map +1 -1
  3. package/dist/api/strategies/debate.js +41 -5
  4. package/dist/api/strategies/debate.js.map +1 -1
  5. package/dist/api/strategies/hierarchical.d.ts +15 -1
  6. package/dist/api/strategies/hierarchical.d.ts.map +1 -1
  7. package/dist/api/strategies/hierarchical.js +51 -7
  8. package/dist/api/strategies/hierarchical.js.map +1 -1
  9. package/dist/api/strategies/index.d.ts +26 -4
  10. package/dist/api/strategies/index.d.ts.map +1 -1
  11. package/dist/api/strategies/index.js +26 -4
  12. package/dist/api/strategies/index.js.map +1 -1
  13. package/dist/api/strategies/parallel.d.ts +15 -4
  14. package/dist/api/strategies/parallel.d.ts.map +1 -1
  15. package/dist/api/strategies/parallel.js +53 -16
  16. package/dist/api/strategies/parallel.js.map +1 -1
  17. package/dist/api/strategies/review-loop.d.ts +15 -1
  18. package/dist/api/strategies/review-loop.d.ts.map +1 -1
  19. package/dist/api/strategies/review-loop.js +36 -10
  20. package/dist/api/strategies/review-loop.js.map +1 -1
  21. package/dist/api/strategies/sequential.d.ts +11 -1
  22. package/dist/api/strategies/sequential.d.ts.map +1 -1
  23. package/dist/api/strategies/sequential.js +39 -8
  24. package/dist/api/strategies/sequential.js.map +1 -1
  25. package/dist/api/strategies/shared.d.ts +71 -7
  26. package/dist/api/strategies/shared.d.ts.map +1 -1
  27. package/dist/api/strategies/shared.js +89 -10
  28. package/dist/api/strategies/shared.js.map +1 -1
  29. package/dist/api/types.d.ts +54 -1
  30. package/dist/api/types.d.ts.map +1 -1
  31. package/dist/api/types.js.map +1 -1
  32. package/dist/memory/facade/Memory.d.ts.map +1 -1
  33. package/dist/memory/facade/Memory.js +8 -0
  34. package/dist/memory/facade/Memory.js.map +1 -1
  35. package/dist/memory/facade/types.d.ts +10 -0
  36. package/dist/memory/facade/types.d.ts.map +1 -1
  37. package/dist/memory/index.d.ts +6 -0
  38. package/dist/memory/index.d.ts.map +1 -1
  39. package/dist/memory/index.js +5 -0
  40. package/dist/memory/index.js.map +1 -1
  41. package/dist/memory/observation/MemoryObserver.d.ts +63 -1
  42. package/dist/memory/observation/MemoryObserver.d.ts.map +1 -1
  43. package/dist/memory/observation/MemoryObserver.js +115 -4
  44. package/dist/memory/observation/MemoryObserver.js.map +1 -1
  45. package/dist/memory/observation/ObservationCompressor.d.ts +88 -0
  46. package/dist/memory/observation/ObservationCompressor.d.ts.map +1 -0
  47. package/dist/memory/observation/ObservationCompressor.js +207 -0
  48. package/dist/memory/observation/ObservationCompressor.js.map +1 -0
  49. package/dist/memory/observation/ObservationReflector.d.ts +82 -0
  50. package/dist/memory/observation/ObservationReflector.d.ts.map +1 -0
  51. package/dist/memory/observation/ObservationReflector.js +212 -0
  52. package/dist/memory/observation/ObservationReflector.js.map +1 -0
  53. package/dist/memory/observation/temporal.d.ts +54 -0
  54. package/dist/memory/observation/temporal.d.ts.map +1 -0
  55. package/dist/memory/observation/temporal.js +115 -0
  56. package/dist/memory/observation/temporal.js.map +1 -0
  57. package/dist/orchestration/builders/VoiceNodeBuilder.d.ts +82 -25
  58. package/dist/orchestration/builders/VoiceNodeBuilder.d.ts.map +1 -1
  59. package/dist/orchestration/builders/VoiceNodeBuilder.js +86 -26
  60. package/dist/orchestration/builders/VoiceNodeBuilder.js.map +1 -1
  61. package/dist/orchestration/events/GraphEvent.d.ts +67 -5
  62. package/dist/orchestration/events/GraphEvent.d.ts.map +1 -1
  63. package/dist/orchestration/events/GraphEvent.js.map +1 -1
  64. package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts +102 -25
  65. package/dist/orchestration/runtime/VoiceNodeExecutor.d.ts.map +1 -1
  66. package/dist/orchestration/runtime/VoiceNodeExecutor.js +133 -38
  67. package/dist/orchestration/runtime/VoiceNodeExecutor.js.map +1 -1
  68. package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts +94 -32
  69. package/dist/orchestration/runtime/VoiceTransportAdapter.d.ts.map +1 -1
  70. package/dist/orchestration/runtime/VoiceTransportAdapter.js +82 -28
  71. package/dist/orchestration/runtime/VoiceTransportAdapter.js.map +1 -1
  72. package/dist/orchestration/runtime/VoiceTurnCollector.d.ts +73 -20
  73. package/dist/orchestration/runtime/VoiceTurnCollector.d.ts.map +1 -1
  74. package/dist/orchestration/runtime/VoiceTurnCollector.js +84 -23
  75. package/dist/orchestration/runtime/VoiceTurnCollector.js.map +1 -1
  76. package/dist/voice/CallManager.d.ts.map +1 -1
  77. package/dist/voice/CallManager.js +9 -1
  78. package/dist/voice/CallManager.js.map +1 -1
  79. package/dist/voice/MediaStreamParser.d.ts +115 -6
  80. package/dist/voice/MediaStreamParser.d.ts.map +1 -1
  81. package/dist/voice/MediaStreamParser.js +44 -0
  82. package/dist/voice/MediaStreamParser.js.map +1 -1
  83. package/dist/voice/TelephonyStreamTransport.d.ts +112 -20
  84. package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -1
  85. package/dist/voice/TelephonyStreamTransport.js +136 -30
  86. package/dist/voice/TelephonyStreamTransport.js.map +1 -1
  87. package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +64 -6
  88. package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -1
  89. package/dist/voice/parsers/PlivoMediaStreamParser.js +67 -6
  90. package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -1
  91. package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +55 -8
  92. package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -1
  93. package/dist/voice/parsers/TelnyxMediaStreamParser.js +60 -9
  94. package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -1
  95. package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +73 -11
  96. package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -1
  97. package/dist/voice/parsers/TwilioMediaStreamParser.js +81 -12
  98. package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -1
  99. package/dist/voice/providers/plivo.d.ts +108 -12
  100. package/dist/voice/providers/plivo.d.ts.map +1 -1
  101. package/dist/voice/providers/plivo.js +106 -9
  102. package/dist/voice/providers/plivo.js.map +1 -1
  103. package/dist/voice/providers/telnyx.d.ts +110 -20
  104. package/dist/voice/providers/telnyx.d.ts.map +1 -1
  105. package/dist/voice/providers/telnyx.js +111 -20
  106. package/dist/voice/providers/telnyx.js.map +1 -1
  107. package/dist/voice/providers/twilio.d.ts +91 -13
  108. package/dist/voice/providers/twilio.d.ts.map +1 -1
  109. package/dist/voice/providers/twilio.js +94 -14
  110. package/dist/voice/providers/twilio.js.map +1 -1
  111. package/dist/voice/twiml.d.ts +70 -12
  112. package/dist/voice/twiml.d.ts.map +1 -1
  113. package/dist/voice/twiml.js +70 -12
  114. package/dist/voice/twiml.js.map +1 -1
  115. package/dist/voice/types.d.ts +142 -15
  116. package/dist/voice/types.d.ts.map +1 -1
  117. package/dist/voice/types.js +34 -3
  118. package/dist/voice/types.js.map +1 -1
  119. package/package.json +1 -1
@@ -1,8 +1,59 @@
1
+ /**
2
+ * @fileoverview Plivo Audio Stream WebSocket parser.
3
+ *
4
+ * ## Plivo Audio Stream protocol
5
+ *
6
+ * Plivo's bidirectional Audio Stream (triggered by the `<Stream>` XML element)
7
+ * sends JSON-encoded messages over WebSocket for stream lifecycle and audio data.
8
+ *
9
+ * ### Inbound message shapes
10
+ *
11
+ * ```
12
+ * ┌─────────────────────────────────────────────────────────────────────┐
13
+ * │ event: "start" │
14
+ * │ stream_id: "s-xxx" │
15
+ * │ call_uuid: "u-xxx" │
16
+ * ├─────────────────────────────────────────────────────────────────────┤
17
+ * │ event: "media" │
18
+ * │ stream_id: "s-xxx" │
19
+ * │ media: { payload: "<base64 mu-law audio>" } │
20
+ * ├─────────────────────────────────────────────────────────────────────┤
21
+ * │ event: "stop" │
22
+ * │ stream_id: "s-xxx" │
23
+ * └─────────────────────────────────────────────────────────────────────┘
24
+ * ```
25
+ *
26
+ * ### Outbound `playAudio` format
27
+ *
28
+ * To send audio back to the caller, the server sends a JSON `playAudio` event:
29
+ * ```json
30
+ * { "event": "playAudio", "media": { "payload": "<base64 mu-law audio>" } }
31
+ * ```
32
+ *
33
+ * Note: unlike Twilio, Plivo's outbound format does NOT include a `streamSid`
34
+ * or `stream_id` field -- the audio is implicitly routed to the caller on the
35
+ * same WebSocket connection.
36
+ *
37
+ * ### Differences from Twilio and Telnyx
38
+ *
39
+ * - **No DTMF over media stream**: Plivo delivers DTMF via `<GetDigits>`
40
+ * XML callback webhooks (as a `Digits` POST parameter), not over the
41
+ * WebSocket stream.
42
+ * - **No outbound track filtering**: Plivo does not echo outbound audio back
43
+ * on the stream, so no `track` field filtering is needed.
44
+ * - **No connection acknowledgment**: Plivo does not require a `connected`
45
+ * handshake message after the WebSocket opens.
46
+ * - **Uses `call_uuid`**: Plivo's call identifier field is `call_uuid`
47
+ * (vs. Twilio's `callSid` and Telnyx's `call_control_id`).
48
+ *
49
+ * @see {@link https://www.plivo.com/docs/voice/xml/stream}
50
+ * @module @framers/agentos/voice/parsers/PlivoMediaStreamParser
51
+ */
1
52
  /**
2
53
  * Parses the Plivo Audio Stream WebSocket protocol.
3
54
  *
4
55
  * Plivo sends JSON-encoded messages for stream lifecycle events (`start`,
5
- * `stop`) and audio chunks (`media`). The audio payload is base64-encoded
56
+ * `stop`) and audio chunks (`media`). The audio payload is base64-encoded
6
57
  * mu-law PCM, delivered in a `payload` field inside the `media` object.
7
58
  *
8
59
  * Outgoing audio is wrapped in a `playAudio` JSON envelope, which is the
@@ -16,10 +67,15 @@ export class PlivoMediaStreamParser {
16
67
  * Parse a raw WebSocket frame from Plivo's audio stream.
17
68
  *
18
69
  * Supported Plivo event types:
19
- * - `start` stream established; `stream_id` maps to `streamSid`,
70
+ * - `start` -- stream established; `stream_id` maps to `streamSid`,
20
71
  * `call_uuid` maps to `callSid`.
21
- * - `media` audio chunk; `media.payload` contains base64-encoded mu-law.
22
- * - `stop` — stream ended.
72
+ * - `media` -- audio chunk; `media.payload` contains base64-encoded mu-law
73
+ * PCM bytes.
74
+ * - `stop` -- stream ended (call terminated or stream explicitly closed).
75
+ *
76
+ * Any other event type is silently dropped by returning `null`. Malformed
77
+ * JSON or messages missing required fields (`event`, `stream_id`) also
78
+ * return `null`.
23
79
  *
24
80
  * @param data - Raw WebSocket frame payload (JSON string or Buffer from Plivo).
25
81
  * @returns Normalised {@link MediaStreamIncoming} event, or `null` for
@@ -35,12 +91,15 @@ export class PlivoMediaStreamParser {
35
91
  return null;
36
92
  }
37
93
  const event = msg['event'];
94
+ // Plivo uses `stream_id` as the stream identifier (same naming as Telnyx).
38
95
  const streamSid = msg['stream_id'];
39
96
  if (!event || !streamSid) {
40
97
  return null;
41
98
  }
42
99
  switch (event) {
43
100
  case 'start': {
101
+ // Plivo uses `call_uuid` as its unique call identifier,
102
+ // different from Twilio's `callSid` and Telnyx's `call_control_id`.
44
103
  const callSid = msg['call_uuid'] ?? '';
45
104
  const result = {
46
105
  type: 'start',
@@ -75,12 +134,14 @@ export class PlivoMediaStreamParser {
75
134
  * Encode mu-law audio for transmission back to Plivo.
76
135
  *
77
136
  * Plivo requires audio to be base64-encoded and wrapped in a `playAudio`
78
- * JSON envelope.
137
+ * JSON envelope. Unlike Twilio, the `streamSid` / `stream_id` is NOT
138
+ * included in the outbound message -- Plivo implicitly routes the audio
139
+ * to the caller on the same WebSocket connection.
79
140
  *
80
141
  * @param audio - Raw mu-law PCM bytes to send to the caller.
81
142
  * @param _streamSid - Unused by Plivo's `playAudio` format (accepted for
82
143
  * interface parity with other parsers).
83
- * @returns JSON string conforming to the Plivo `playAudio` envelope.
144
+ * @returns JSON string: `{ event: 'playAudio', media: { payload: '<base64>' } }`
84
145
  */
85
146
  formatOutgoing(audio, _streamSid) {
86
147
  return JSON.stringify({
@@ -1 +1 @@
1
- {"version":3,"file":"PlivoMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,sBAAsB;IACjC;;;;;;;;;;;;OAYG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,OAAO,GAAI,GAAG,CAAC,WAAW,CAAwB,IAAI,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,CAAuB,CAAC;gBAC1D,IAAI,CAAC,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAE7B,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC;oBAC1C,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;;OAUG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,KAAK,EAAE,WAAW;YAClB,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;SAC7C,CAAC,CAAC;IACL,CAAC;CACF"}
1
+ {"version":3,"file":"PlivoMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/PlivoMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAkDG;AAIH;;;;;;;;;;;;GAYG;AACH,MAAM,OAAO,sBAAsB;IACjC;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,2EAA2E;QAC3E,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,wDAAwD;gBACxD,oEAAoE;gBACpE,MAAM,OAAO,GAAI,GAAG,CAAC,WAAW,CAAwB,IAAI,EAAE,CAAC;gBAC/D,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,UAAU,GAAG,KAAK,CAAC,SAAS,CAAuB,CAAC;gBAC1D,IAAI,CAAC,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAE7B,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,UAAU,EAAE,QAAQ,CAAC;oBAC1C,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;;;;OAYG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,IAAI,CAAC,SAAS,CAAC;YACpB,KAAK,EAAE,WAAW;YAClB,KAAK,EAAE,EAAE,OAAO,EAAE,KAAK,CAAC,QAAQ,CAAC,QAAQ,CAAC,EAAE;SAC7C,CAAC,CAAC;IACL,CAAC;CACF"}
@@ -1,15 +1,52 @@
1
+ /**
2
+ * @fileoverview Telnyx media stream WebSocket parser.
3
+ *
4
+ * ## Telnyx's asymmetric protocol
5
+ *
6
+ * Telnyx uses a fundamentally different approach than Twilio for inbound vs.
7
+ * outbound audio on the media stream WebSocket:
8
+ *
9
+ * - **Inbound** (phone -> server): JSON-encoded messages with `event`, `stream_id`,
10
+ * and `media.chunk` (base64 mu-law audio) fields.
11
+ * - **Outbound** (server -> phone): **Raw binary** WebSocket frames containing
12
+ * mu-law PCM bytes directly, with no JSON envelope whatsoever.
13
+ *
14
+ * This asymmetry means {@link formatOutgoing} returns the `Buffer` unchanged,
15
+ * while {@link parseIncoming} parses JSON and base64-decodes the audio payload.
16
+ *
17
+ * ## Field name mapping
18
+ *
19
+ * Telnyx uses snake_case field names that differ from Twilio's conventions.
20
+ * This parser normalises them to the shared {@link MediaStreamIncoming} shape:
21
+ *
22
+ * | Telnyx field | Normalised field |
23
+ * |----------------------|-------------------|
24
+ * | `stream_id` | `streamSid` |
25
+ * | `call_control_id` | `callSid` |
26
+ * | `media.chunk` | `payload` (Buffer)|
27
+ * | `media.track` | (used for filtering, not emitted) |
28
+ *
29
+ * ## DTMF limitation
30
+ *
31
+ * Telnyx does NOT deliver DTMF events over the media stream WebSocket.
32
+ * DTMF key-presses arrive as `call.dtmf.received` HTTP webhook events and
33
+ * must be handled by {@link TelnyxVoiceProvider.parseWebhookEvent} instead.
34
+ *
35
+ * @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
36
+ * @module @framers/agentos/voice/parsers/TelnyxMediaStreamParser
37
+ */
1
38
  import type { MediaStreamParser, MediaStreamIncoming } from '../MediaStreamParser.js';
2
39
  /**
3
40
  * Parses the Telnyx media stream WebSocket protocol.
4
41
  *
5
42
  * Telnyx sends JSON-encoded messages for stream lifecycle events (`start`,
6
- * `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
7
- * DTMF events over the media stream WebSocket those arrive as HTTP webhooks
43
+ * `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
44
+ * DTMF events over the media stream WebSocket -- those arrive as HTTP webhooks
8
45
  * to a separate endpoint and must be handled outside this parser.
9
46
  *
10
47
  * Outgoing audio is sent as a **raw binary Buffer** (mu-law PCM bytes without
11
48
  * any JSON envelope) because Telnyx accepts unframed binary WebSocket frames
12
- * directly. No explicit connection acknowledgment is needed after the
49
+ * directly. No explicit connection acknowledgment is needed after the
13
50
  * handshake.
14
51
  *
15
52
  * @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
@@ -19,11 +56,15 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
19
56
  * Parse a raw WebSocket frame from Telnyx's media stream.
20
57
  *
21
58
  * Supported Telnyx event types:
22
- * - `start` stream established; `stream_id` maps to `streamSid`,
59
+ * - `start` -- stream established; `stream_id` maps to `streamSid`,
23
60
  * `call_control_id` maps to `callSid`.
24
- * - `media` audio chunk; `chunk` field contains base64-encoded mu-law
25
- * bytes; only `inbound` track frames are returned.
26
- * - `stop` — stream ended.
61
+ * - `media` -- audio chunk; `media.chunk` field contains base64-encoded mu-law
62
+ * bytes; only `inbound` track frames are returned (outbound echoes are
63
+ * discarded to prevent feedback loops).
64
+ * - `stop` -- stream ended (call terminated or stream explicitly closed).
65
+ *
66
+ * Any other event type (e.g., future Telnyx additions, DTMF attempts) is
67
+ * silently dropped by returning `null`.
27
68
  *
28
69
  * @param data - Raw WebSocket frame payload (JSON string or Buffer from Telnyx).
29
70
  * @returns Normalised {@link MediaStreamIncoming} event, or `null` for
@@ -33,7 +74,9 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
33
74
  /**
34
75
  * Encode mu-law audio for transmission back to Telnyx.
35
76
  *
36
- * Telnyx accepts raw binary WebSocket frames; no JSON wrapping is applied.
77
+ * Telnyx accepts raw binary WebSocket frames -- no JSON wrapping is needed.
78
+ * This is the key asymmetry in Telnyx's protocol: inbound is JSON, outbound
79
+ * is raw binary.
37
80
  *
38
81
  * @param audio - Raw mu-law PCM bytes to send to the caller.
39
82
  * @param _streamSid - Unused by Telnyx binary framing (accepted for interface
@@ -44,6 +87,10 @@ export declare class TelnyxMediaStreamParser implements MediaStreamParser {
44
87
  /**
45
88
  * No explicit connection acknowledgment is required by Telnyx.
46
89
  *
90
+ * Unlike Twilio, Telnyx does not need a `connected` handshake message
91
+ * before it starts sending media events.
92
+ *
93
+ * @param _streamSid - Unused (accepted for interface parity).
47
94
  * @returns Always `null`.
48
95
  */
49
96
  formatConnected(_streamSid: string): null;
@@ -1 +1 @@
1
- {"version":3,"file":"TelnyxMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;;;;;GAcG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D;;;;;;;;;;;;;OAaG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IAyDhE;;;;;;;;;OASG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM;IAIzD;;;;OAIG;IACH,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;CAG1C"}
1
+ {"version":3,"file":"TelnyxMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;;;;;GAcG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IA6DhE;;;;;;;;;;;OAWG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,UAAU,EAAE,MAAM,GAAG,MAAM;IAIzD;;;;;;;;OAQG;IACH,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,IAAI;CAG1C"}
@@ -1,14 +1,51 @@
1
+ /**
2
+ * @fileoverview Telnyx media stream WebSocket parser.
3
+ *
4
+ * ## Telnyx's asymmetric protocol
5
+ *
6
+ * Telnyx uses a fundamentally different approach than Twilio for inbound vs.
7
+ * outbound audio on the media stream WebSocket:
8
+ *
9
+ * - **Inbound** (phone -> server): JSON-encoded messages with `event`, `stream_id`,
10
+ * and `media.chunk` (base64 mu-law audio) fields.
11
+ * - **Outbound** (server -> phone): **Raw binary** WebSocket frames containing
12
+ * mu-law PCM bytes directly, with no JSON envelope whatsoever.
13
+ *
14
+ * This asymmetry means {@link formatOutgoing} returns the `Buffer` unchanged,
15
+ * while {@link parseIncoming} parses JSON and base64-decodes the audio payload.
16
+ *
17
+ * ## Field name mapping
18
+ *
19
+ * Telnyx uses snake_case field names that differ from Twilio's conventions.
20
+ * This parser normalises them to the shared {@link MediaStreamIncoming} shape:
21
+ *
22
+ * | Telnyx field | Normalised field |
23
+ * |----------------------|-------------------|
24
+ * | `stream_id` | `streamSid` |
25
+ * | `call_control_id` | `callSid` |
26
+ * | `media.chunk` | `payload` (Buffer)|
27
+ * | `media.track` | (used for filtering, not emitted) |
28
+ *
29
+ * ## DTMF limitation
30
+ *
31
+ * Telnyx does NOT deliver DTMF events over the media stream WebSocket.
32
+ * DTMF key-presses arrive as `call.dtmf.received` HTTP webhook events and
33
+ * must be handled by {@link TelnyxVoiceProvider.parseWebhookEvent} instead.
34
+ *
35
+ * @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
36
+ * @module @framers/agentos/voice/parsers/TelnyxMediaStreamParser
37
+ */
1
38
  /**
2
39
  * Parses the Telnyx media stream WebSocket protocol.
3
40
  *
4
41
  * Telnyx sends JSON-encoded messages for stream lifecycle events (`start`,
5
- * `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
6
- * DTMF events over the media stream WebSocket those arrive as HTTP webhooks
42
+ * `stop`) and audio chunks (`media`). Unlike Twilio, Telnyx does NOT deliver
43
+ * DTMF events over the media stream WebSocket -- those arrive as HTTP webhooks
7
44
  * to a separate endpoint and must be handled outside this parser.
8
45
  *
9
46
  * Outgoing audio is sent as a **raw binary Buffer** (mu-law PCM bytes without
10
47
  * any JSON envelope) because Telnyx accepts unframed binary WebSocket frames
11
- * directly. No explicit connection acknowledgment is needed after the
48
+ * directly. No explicit connection acknowledgment is needed after the
12
49
  * handshake.
13
50
  *
14
51
  * @see {@link https://developers.telnyx.com/docs/voice/media-streaming}
@@ -18,11 +55,15 @@ export class TelnyxMediaStreamParser {
18
55
  * Parse a raw WebSocket frame from Telnyx's media stream.
19
56
  *
20
57
  * Supported Telnyx event types:
21
- * - `start` stream established; `stream_id` maps to `streamSid`,
58
+ * - `start` -- stream established; `stream_id` maps to `streamSid`,
22
59
  * `call_control_id` maps to `callSid`.
23
- * - `media` audio chunk; `chunk` field contains base64-encoded mu-law
24
- * bytes; only `inbound` track frames are returned.
25
- * - `stop` — stream ended.
60
+ * - `media` -- audio chunk; `media.chunk` field contains base64-encoded mu-law
61
+ * bytes; only `inbound` track frames are returned (outbound echoes are
62
+ * discarded to prevent feedback loops).
63
+ * - `stop` -- stream ended (call terminated or stream explicitly closed).
64
+ *
65
+ * Any other event type (e.g., future Telnyx additions, DTMF attempts) is
66
+ * silently dropped by returning `null`.
26
67
  *
27
68
  * @param data - Raw WebSocket frame payload (JSON string or Buffer from Telnyx).
28
69
  * @returns Normalised {@link MediaStreamIncoming} event, or `null` for
@@ -38,12 +79,15 @@ export class TelnyxMediaStreamParser {
38
79
  return null;
39
80
  }
40
81
  const event = msg['event'];
82
+ // Telnyx uses `stream_id` where Twilio uses `streamSid`.
41
83
  const streamSid = msg['stream_id'];
42
84
  if (!event || !streamSid) {
43
85
  return null;
44
86
  }
45
87
  switch (event) {
46
88
  case 'start': {
89
+ // Telnyx uses `call_control_id` as the call-leg identifier,
90
+ // equivalent to Twilio's `callSid`.
47
91
  const callSid = msg['call_control_id'] ?? '';
48
92
  const result = {
49
93
  type: 'start',
@@ -56,10 +100,11 @@ export class TelnyxMediaStreamParser {
56
100
  const media = msg['media'];
57
101
  if (!media)
58
102
  return null;
59
- // Ignore outbound audio echoes from Telnyx.
103
+ // Ignore outbound audio echoes from Telnyx to prevent feedback.
60
104
  const track = media['track'];
61
105
  if (track === 'outbound')
62
106
  return null;
107
+ // Telnyx names its audio payload field `chunk` (not `payload` like Twilio).
63
108
  const chunk = media['chunk'];
64
109
  if (!chunk)
65
110
  return null;
@@ -81,7 +126,9 @@ export class TelnyxMediaStreamParser {
81
126
  /**
82
127
  * Encode mu-law audio for transmission back to Telnyx.
83
128
  *
84
- * Telnyx accepts raw binary WebSocket frames; no JSON wrapping is applied.
129
+ * Telnyx accepts raw binary WebSocket frames -- no JSON wrapping is needed.
130
+ * This is the key asymmetry in Telnyx's protocol: inbound is JSON, outbound
131
+ * is raw binary.
85
132
  *
86
133
  * @param audio - Raw mu-law PCM bytes to send to the caller.
87
134
  * @param _streamSid - Unused by Telnyx binary framing (accepted for interface
@@ -94,6 +141,10 @@ export class TelnyxMediaStreamParser {
94
141
  /**
95
142
  * No explicit connection acknowledgment is required by Telnyx.
96
143
  *
144
+ * Unlike Twilio, Telnyx does not need a `connected` handshake message
145
+ * before it starts sending media events.
146
+ *
147
+ * @param _streamSid - Unused (accepted for interface parity).
97
148
  * @returns Always `null`.
98
149
  */
99
150
  formatConnected(_streamSid) {
@@ -1 +1 @@
1
- {"version":3,"file":"TelnyxMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAEA;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,uBAAuB;IAClC;;;;;;;;;;;;;OAaG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,OAAO,GAAI,GAAG,CAAC,iBAAiB,CAAwB,IAAI,EAAE,CAAC;gBACrE,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,4CAA4C;gBAC5C,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAuB,CAAC;gBACnD,IAAI,KAAK,KAAK,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAEtC,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAuB,CAAC;gBACnD,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC;oBACrC,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;OASG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;OAIG;IACH,eAAe,CAAC,UAAkB;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
1
+ {"version":3,"file":"TelnyxMediaStreamParser.js","sourceRoot":"","sources":["../../../src/voice/parsers/TelnyxMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAoCG;AAIH;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,uBAAuB;IAClC;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAqB;QACjC,MAAM,GAAG,GAAG,OAAO,IAAI,KAAK,QAAQ,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,IAAI,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEpE,IAAI,GAA4B,CAAC;QACjC,IAAI,CAAC;YACH,GAAG,GAAG,IAAI,CAAC,KAAK,CAAC,GAAG,CAA4B,CAAC;QACnD,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,IAAI,CAAC;QACd,CAAC;QAED,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAuB,CAAC;QACjD,yDAAyD;QACzD,MAAM,SAAS,GAAG,GAAG,CAAC,WAAW,CAAuB,CAAC;QAEzD,IAAI,CAAC,KAAK,IAAI,CAAC,SAAS,EAAE,CAAC;YACzB,OAAO,IAAI,CAAC;QACd,CAAC;QAED,QAAQ,KAAK,EAAE,CAAC;YACd,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,4DAA4D;gBAC5D,oCAAoC;gBACpC,MAAM,OAAO,GAAI,GAAG,CAAC,iBAAiB,CAAwB,IAAI,EAAE,CAAC;gBACrE,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,SAAS;oBACT,OAAO;iBACR,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,OAAO,CAAC,CAAC,CAAC;gBACb,MAAM,KAAK,GAAG,GAAG,CAAC,OAAO,CAAwC,CAAC;gBAClE,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,gEAAgE;gBAChE,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAuB,CAAC;gBACnD,IAAI,KAAK,KAAK,UAAU;oBAAE,OAAO,IAAI,CAAC;gBAEtC,4EAA4E;gBAC5E,MAAM,KAAK,GAAG,KAAK,CAAC,OAAO,CAAuB,CAAC;gBACnD,IAAI,CAAC,KAAK;oBAAE,OAAO,IAAI,CAAC;gBAExB,MAAM,MAAM,GAAwB;oBAClC,IAAI,EAAE,OAAO;oBACb,OAAO,EAAE,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC;oBACrC,SAAS;iBACV,CAAC;gBACF,OAAO,MAAM,CAAC;YAChB,CAAC;YAED,KAAK,MAAM,CAAC,CAAC,CAAC;gBACZ,MAAM,MAAM,GAAwB,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,CAAC;gBAChE,OAAO,MAAM,CAAC;YAChB,CAAC;YAED;gBACE,OAAO,IAAI,CAAC;QAChB,CAAC;IACH,CAAC;IAED;;;;;;;;;;;OAWG;IACH,cAAc,CAAC,KAAa,EAAE,UAAkB;QAC9C,OAAO,KAAK,CAAC;IACf,CAAC;IAED;;;;;;;;OAQG;IACH,eAAe,CAAC,UAAkB;QAChC,OAAO,IAAI,CAAC;IACd,CAAC;CACF"}
@@ -1,10 +1,64 @@
1
+ /**
2
+ * @fileoverview Twilio `<Connect><Stream>` WebSocket media stream parser.
3
+ *
4
+ * ## Twilio media stream protocol
5
+ *
6
+ * When a Twilio call executes the TwiML `<Connect><Stream url="wss://..." />`,
7
+ * Twilio opens a WebSocket to the specified URL and sends **all messages as
8
+ * JSON-encoded strings** (never raw binary). Each message has an `event` field
9
+ * and a `streamSid` field that together identify the event type and stream.
10
+ *
11
+ * ### Inbound JSON message shapes
12
+ *
13
+ * ```
14
+ * ┌─────────────────────────────────────────────────────────────────────┐
15
+ * │ event: "start" │
16
+ * │ streamSid: "MZxxx" │
17
+ * │ start: { callSid, accountSid, mediaFormat: { encoding, ... } } │
18
+ * ├─────────────────────────────────────────────────────────────────────┤
19
+ * │ event: "media" │
20
+ * │ streamSid: "MZxxx" │
21
+ * │ media: { track: "inbound"|"outbound", payload: "<base64>" } │
22
+ * │ sequenceNumber: 42 │
23
+ * ├─────────────────────────────────────────────────────────────────────┤
24
+ * │ event: "dtmf" │
25
+ * │ streamSid: "MZxxx" │
26
+ * │ dtmf: { digit: "5", duration: 500 } │
27
+ * ├─────────────────────────────────────────────────────────────────────┤
28
+ * │ event: "mark" │
29
+ * │ streamSid: "MZxxx" │
30
+ * │ mark: { name: "utterance-done" } │
31
+ * ├─────────────────────────────────────────────────────────────────────┤
32
+ * │ event: "stop" │
33
+ * │ streamSid: "MZxxx" │
34
+ * └─────────────────────────────────────────────────────────────────────┘
35
+ * ```
36
+ *
37
+ * ### Outbound audio format
38
+ *
39
+ * Audio sent back to Twilio must be wrapped in a JSON `media` envelope:
40
+ * ```json
41
+ * { "event": "media", "streamSid": "MZxxx", "media": { "payload": "<base64>" } }
42
+ * ```
43
+ *
44
+ * ### Connection acknowledgment
45
+ *
46
+ * Immediately after the WebSocket handshake, the server must send:
47
+ * ```json
48
+ * { "event": "connected", "protocol": "Call", "version": "1.0.0" }
49
+ * ```
50
+ * This tells Twilio the listener is ready to receive media.
51
+ *
52
+ * @see {@link https://www.twilio.com/docs/voice/twiml/stream}
53
+ * @module @framers/agentos/voice/parsers/TwilioMediaStreamParser
54
+ */
1
55
  import type { MediaStreamParser, MediaStreamIncoming } from '../MediaStreamParser.js';
2
56
  /**
3
57
  * Parses the Twilio `<Connect><Stream>` WebSocket media stream protocol.
4
58
  *
5
- * Twilio sends all messages as JSON-encoded strings. Outbound audio is
59
+ * Twilio sends all messages as JSON-encoded strings. Outbound audio is
6
60
  * wrapped in the same JSON envelope so Twilio can associate it with the
7
- * correct stream. An explicit `connected` acknowledgment is sent once
61
+ * correct stream. An explicit `connected` acknowledgment is sent once
8
62
  * immediately after the WebSocket handshake to signal that the listener is
9
63
  * ready to receive media.
10
64
  *
@@ -15,11 +69,15 @@ export declare class TwilioMediaStreamParser implements MediaStreamParser {
15
69
  * Parse a raw WebSocket frame from Twilio's media stream.
16
70
  *
17
71
  * Supported Twilio event types:
18
- * - `start` stream established, includes callSid
19
- * - `media` audio chunk (inbound track only; outbound chunks are ignored)
20
- * - `dtmf` DTMF keypress detected
21
- * - `stop` stream ended
22
- * - `mark` named synchronisation marker
72
+ * - `start` -- stream established, includes callSid and media format metadata.
73
+ * - `media` -- audio chunk (inbound track only; outbound echoes are discarded
74
+ * to prevent feedback loops).
75
+ * - `dtmf` -- DTMF keypress detected on the audio stream.
76
+ * - `stop` -- stream ended (call hangup or stream disconnect).
77
+ * - `mark` -- named synchronisation marker confirming playback reached a point.
78
+ *
79
+ * Messages with missing `event` or `streamSid` fields, malformed JSON,
80
+ * or unrecognised event types are silently dropped (return `null`).
23
81
  *
24
82
  * @param data - Raw WebSocket frame payload (always a JSON string from Twilio).
25
83
  * @returns Normalised {@link MediaStreamIncoming} event, or `null` for
@@ -30,20 +88,24 @@ export declare class TwilioMediaStreamParser implements MediaStreamParser {
30
88
  * Encode mu-law audio for transmission back to the Twilio stream.
31
89
  *
32
90
  * Twilio requires base64-encoded audio wrapped in a JSON `media` envelope
33
- * so it can route the audio to the correct stream.
91
+ * so it can route the audio to the correct stream by `streamSid`.
34
92
  *
35
93
  * @param audio - Raw mu-law PCM bytes to send to the caller.
36
94
  * @param streamSid - The stream identifier to include in the envelope.
37
- * @returns JSON string conforming to the Twilio media-out envelope format.
95
+ * @returns JSON string conforming to the Twilio media-out envelope format:
96
+ * `{ event: 'media', streamSid: '...', media: { payload: '<base64>' } }`
38
97
  */
39
98
  formatOutgoing(audio: Buffer, streamSid: string): string;
40
99
  /**
41
100
  * Generate the initial `connected` acknowledgment expected by Twilio
42
101
  * immediately after the WebSocket connection is established.
43
102
  *
44
- * @param _streamSid - Unused — Twilio does not require the stream ID in the
103
+ * Without this message, Twilio waits indefinitely for a response and
104
+ * eventually times out the stream connection.
105
+ *
106
+ * @param _streamSid - Unused -- Twilio does not require the stream ID in the
45
107
  * `connected` message, but the parameter is accepted for interface parity.
46
- * @returns JSON string with the `connected` envelope.
108
+ * @returns JSON string: `{ event: 'connected', protocol: 'Call', version: '1.0.0' }`
47
109
  */
48
110
  formatConnected(_streamSid: string): string;
49
111
  }
@@ -1 +1 @@
1
- {"version":3,"file":"TwilioMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TwilioMediaStreamParser.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;GAUG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D;;;;;;;;;;;;;OAaG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IA+FhE;;;;;;;;;OASG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAQxD;;;;;;;OAOG;IACH,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM;CAO5C"}
1
+ {"version":3,"file":"TwilioMediaStreamParser.d.ts","sourceRoot":"","sources":["../../../src/voice/parsers/TwilioMediaStreamParser.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAqDG;AAEH,OAAO,KAAK,EAAE,iBAAiB,EAAE,mBAAmB,EAAE,MAAM,yBAAyB,CAAC;AAEtF;;;;;;;;;;GAUG;AACH,qBAAa,uBAAwB,YAAW,iBAAiB;IAC/D;;;;;;;;;;;;;;;;;OAiBG;IACH,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,MAAM,GAAG,mBAAmB,GAAG,IAAI;IAsGhE;;;;;;;;;;OAUG;IACH,cAAc,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,GAAG,MAAM;IAQxD;;;;;;;;;;OAUG;IACH,eAAe,CAAC,UAAU,EAAE,MAAM,GAAG,MAAM;CAO5C"}
@@ -1,9 +1,63 @@
1
+ /**
2
+ * @fileoverview Twilio `<Connect><Stream>` WebSocket media stream parser.
3
+ *
4
+ * ## Twilio media stream protocol
5
+ *
6
+ * When a Twilio call executes the TwiML `<Connect><Stream url="wss://..." />`,
7
+ * Twilio opens a WebSocket to the specified URL and sends **all messages as
8
+ * JSON-encoded strings** (never raw binary). Each message has an `event` field
9
+ * and a `streamSid` field that together identify the event type and stream.
10
+ *
11
+ * ### Inbound JSON message shapes
12
+ *
13
+ * ```
14
+ * ┌─────────────────────────────────────────────────────────────────────┐
15
+ * │ event: "start" │
16
+ * │ streamSid: "MZxxx" │
17
+ * │ start: { callSid, accountSid, mediaFormat: { encoding, ... } } │
18
+ * ├─────────────────────────────────────────────────────────────────────┤
19
+ * │ event: "media" │
20
+ * │ streamSid: "MZxxx" │
21
+ * │ media: { track: "inbound"|"outbound", payload: "<base64>" } │
22
+ * │ sequenceNumber: 42 │
23
+ * ├─────────────────────────────────────────────────────────────────────┤
24
+ * │ event: "dtmf" │
25
+ * │ streamSid: "MZxxx" │
26
+ * │ dtmf: { digit: "5", duration: 500 } │
27
+ * ├─────────────────────────────────────────────────────────────────────┤
28
+ * │ event: "mark" │
29
+ * │ streamSid: "MZxxx" │
30
+ * │ mark: { name: "utterance-done" } │
31
+ * ├─────────────────────────────────────────────────────────────────────┤
32
+ * │ event: "stop" │
33
+ * │ streamSid: "MZxxx" │
34
+ * └─────────────────────────────────────────────────────────────────────┘
35
+ * ```
36
+ *
37
+ * ### Outbound audio format
38
+ *
39
+ * Audio sent back to Twilio must be wrapped in a JSON `media` envelope:
40
+ * ```json
41
+ * { "event": "media", "streamSid": "MZxxx", "media": { "payload": "<base64>" } }
42
+ * ```
43
+ *
44
+ * ### Connection acknowledgment
45
+ *
46
+ * Immediately after the WebSocket handshake, the server must send:
47
+ * ```json
48
+ * { "event": "connected", "protocol": "Call", "version": "1.0.0" }
49
+ * ```
50
+ * This tells Twilio the listener is ready to receive media.
51
+ *
52
+ * @see {@link https://www.twilio.com/docs/voice/twiml/stream}
53
+ * @module @framers/agentos/voice/parsers/TwilioMediaStreamParser
54
+ */
1
55
  /**
2
56
  * Parses the Twilio `<Connect><Stream>` WebSocket media stream protocol.
3
57
  *
4
- * Twilio sends all messages as JSON-encoded strings. Outbound audio is
58
+ * Twilio sends all messages as JSON-encoded strings. Outbound audio is
5
59
  * wrapped in the same JSON envelope so Twilio can associate it with the
6
- * correct stream. An explicit `connected` acknowledgment is sent once
60
+ * correct stream. An explicit `connected` acknowledgment is sent once
7
61
  * immediately after the WebSocket handshake to signal that the listener is
8
62
  * ready to receive media.
9
63
  *
@@ -14,11 +68,15 @@ export class TwilioMediaStreamParser {
14
68
  * Parse a raw WebSocket frame from Twilio's media stream.
15
69
  *
16
70
  * Supported Twilio event types:
17
- * - `start` stream established, includes callSid
18
- * - `media` audio chunk (inbound track only; outbound chunks are ignored)
19
- * - `dtmf` DTMF keypress detected
20
- * - `stop` stream ended
21
- * - `mark` named synchronisation marker
71
+ * - `start` -- stream established, includes callSid and media format metadata.
72
+ * - `media` -- audio chunk (inbound track only; outbound echoes are discarded
73
+ * to prevent feedback loops).
74
+ * - `dtmf` -- DTMF keypress detected on the audio stream.
75
+ * - `stop` -- stream ended (call hangup or stream disconnect).
76
+ * - `mark` -- named synchronisation marker confirming playback reached a point.
77
+ *
78
+ * Messages with missing `event` or `streamSid` fields, malformed JSON,
79
+ * or unrecognised event types are silently dropped (return `null`).
22
80
  *
23
81
  * @param data - Raw WebSocket frame payload (always a JSON string from Twilio).
24
82
  * @returns Normalised {@link MediaStreamIncoming} event, or `null` for
@@ -35,12 +93,14 @@ export class TwilioMediaStreamParser {
35
93
  }
36
94
  const event = msg['event'];
37
95
  const streamSid = msg['streamSid'];
96
+ // Both fields are required on every Twilio media stream message.
38
97
  if (!event || !streamSid) {
39
98
  return null;
40
99
  }
41
100
  switch (event) {
42
101
  case 'start': {
43
102
  const startPayload = msg['start'];
103
+ // callSid identifies the Twilio call leg this stream belongs to.
44
104
  const callSid = startPayload?.['callSid'] ?? '';
45
105
  const result = {
46
106
  type: 'start',
@@ -54,7 +114,9 @@ export class TwilioMediaStreamParser {
54
114
  const media = msg['media'];
55
115
  if (!media)
56
116
  return null;
57
- // Only process inbound audio outbound echoes must be discarded.
117
+ // Twilio sends both inbound and outbound audio on the same stream.
118
+ // Outbound echoes must be discarded to prevent feedback loops where
119
+ // the agent hears its own TTS output.
58
120
  const track = media['track'];
59
121
  if (track === 'outbound')
60
122
  return null;
@@ -79,6 +141,7 @@ export class TwilioMediaStreamParser {
79
141
  const digit = dtmf['digit'];
80
142
  if (!digit)
81
143
  return null;
144
+ // Twilio reports DTMF key-hold duration in milliseconds.
82
145
  const duration = typeof dtmf['duration'] === 'number'
83
146
  ? dtmf['duration']
84
147
  : undefined;
@@ -105,6 +168,8 @@ export class TwilioMediaStreamParser {
105
168
  return result;
106
169
  }
107
170
  default:
171
+ // Twilio may add new event types in the future; silently ignore them
172
+ // rather than throwing so existing deployments remain forward-compatible.
108
173
  return null;
109
174
  }
110
175
  }
@@ -112,11 +177,12 @@ export class TwilioMediaStreamParser {
112
177
  * Encode mu-law audio for transmission back to the Twilio stream.
113
178
  *
114
179
  * Twilio requires base64-encoded audio wrapped in a JSON `media` envelope
115
- * so it can route the audio to the correct stream.
180
+ * so it can route the audio to the correct stream by `streamSid`.
116
181
  *
117
182
  * @param audio - Raw mu-law PCM bytes to send to the caller.
118
183
  * @param streamSid - The stream identifier to include in the envelope.
119
- * @returns JSON string conforming to the Twilio media-out envelope format.
184
+ * @returns JSON string conforming to the Twilio media-out envelope format:
185
+ * `{ event: 'media', streamSid: '...', media: { payload: '<base64>' } }`
120
186
  */
121
187
  formatOutgoing(audio, streamSid) {
122
188
  return JSON.stringify({
@@ -129,9 +195,12 @@ export class TwilioMediaStreamParser {
129
195
  * Generate the initial `connected` acknowledgment expected by Twilio
130
196
  * immediately after the WebSocket connection is established.
131
197
  *
132
- * @param _streamSid - Unused — Twilio does not require the stream ID in the
198
+ * Without this message, Twilio waits indefinitely for a response and
199
+ * eventually times out the stream connection.
200
+ *
201
+ * @param _streamSid - Unused -- Twilio does not require the stream ID in the
133
202
  * `connected` message, but the parameter is accepted for interface parity.
134
- * @returns JSON string with the `connected` envelope.
203
+ * @returns JSON string: `{ event: 'connected', protocol: 'Call', version: '1.0.0' }`
135
204
  */
136
205
  formatConnected(_streamSid) {
137
206
  return JSON.stringify({