@cloudflare/voice-telnyx 0.0.0 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,160 @@
1
+ # @cloudflare/voice-telnyx
2
+
3
+ Telnyx voice providers for the Cloudflare Agents voice pipeline.
4
+
5
+ This package includes:
6
+
7
+ - **STT** — real-time speech-to-text via Telnyx's WebSocket transcription API.
8
+ - **TTS** — text-to-speech via Telnyx REST and Workers WebSocket backends.
9
+ - **Telephony** — Telnyx WebRTC/PSTN helpers for routing phone-call audio into a `@cloudflare/voice` agent.
10
+
11
+ ## Installation
12
+
13
+ ```bash
14
+ npm install @cloudflare/voice @cloudflare/voice-telnyx
15
+ ```
16
+
17
+ ## Subpath imports
18
+
19
+ The package root is server-safe and does not import browser WebRTC code. Use
20
+ `/browser` only when you need the browser-side PSTN bridge:
21
+
22
+ ```ts
23
+ import { TelnyxSTT } from "@cloudflare/voice-telnyx/stt";
24
+ import { TelnyxTTS } from "@cloudflare/voice-telnyx/tts";
25
+ import { TelnyxJWTEndpoint } from "@cloudflare/voice-telnyx";
26
+ import { TelnyxCallBridge } from "@cloudflare/voice-telnyx/browser";
27
+ ```
28
+
29
+ ## Worker voice agent
30
+
31
+ ```ts
32
+ import { Agent, routeAgentRequest } from "agents";
33
+ import { withVoice, type VoiceTurnContext } from "@cloudflare/voice";
34
+ import { TelnyxSTT } from "@cloudflare/voice-telnyx/stt";
35
+ import { TelnyxTTS } from "@cloudflare/voice-telnyx/tts";
36
+
37
+ const VoiceAgent = withVoice(Agent);
38
+
39
+ export class MyVoiceAgent extends VoiceAgent<Env> {
40
+ transcriber = new TelnyxSTT({ apiKey: this.env.TELNYX_API_KEY });
41
+ tts = new TelnyxTTS({
42
+ apiKey: this.env.TELNYX_API_KEY,
43
+ voice: "Telnyx.NaturalHD.astra"
44
+ });
45
+
46
+ async onTurn(transcript: string, context: VoiceTurnContext) {
47
+ return `You said: ${transcript}`;
48
+ }
49
+ }
50
+
51
+ export default {
52
+ async fetch(request: Request, env: Env) {
53
+ return (
54
+ (await routeAgentRequest(request, env)) ||
55
+ new Response("Not found", { status: 404 })
56
+ );
57
+ }
58
+ };
59
+ ```
60
+
61
+ ## API
62
+
63
+ ### `TelnyxSTT`
64
+
65
+ Implements `Transcriber` from `@cloudflare/voice`.
66
+
67
+ ```ts
68
+ const stt = new TelnyxSTT({
69
+ apiKey: env.TELNYX_API_KEY,
70
+ engine: "Telnyx", // or "Deepgram"
71
+ language: "en",
72
+ transcriptionModel: "nova-3", // optional, useful with Deepgram engine
73
+ interimResults: true
74
+ });
75
+ ```
76
+
77
+ The Cloudflare voice pipeline feeds raw 16 kHz mono PCM16 audio. Telnyx STT expects a container, so the default `inputFormat: "wav"` prepends a WAV header before streaming audio chunks.
78
+
79
+ ### `TelnyxTTS`
80
+
81
+ Implements `TTSProvider` and `StreamingTTSProvider` from `@cloudflare/voice`.
82
+
83
+ ```ts
84
+ const tts = new TelnyxTTS({
85
+ apiKey: env.TELNYX_API_KEY,
86
+ voice: "Telnyx.NaturalHD.astra",
87
+ backend: "rest" // default; use "websocket" only in Workers runtime
88
+ });
89
+ ```
90
+
91
+ - `backend: "rest"` works anywhere and returns one complete audio buffer per sentence.
92
+ - `backend: "websocket"` streams chunks with lower time-to-first-audio, but requires Cloudflare Workers' fetch-upgrade WebSocket pattern because authentication uses request headers.
93
+
94
+ ### Telephony / PSTN bridge
95
+
96
+ Server-side token helpers are available from the package root. Browser-side
97
+ WebRTC/PSTN helpers are exported from `@cloudflare/voice-telnyx/browser`.
98
+
99
+ ```ts
100
+ import { TelnyxJWTEndpoint } from "@cloudflare/voice-telnyx";
101
+ import {
102
+ createTelnyxVoiceConfig,
103
+ TelnyxPhoneClient
104
+ } from "@cloudflare/voice-telnyx/browser";
105
+ import { WebSocketVoiceTransport } from "@cloudflare/voice/client";
106
+ ```
107
+
108
+ Create a server-side endpoint that keeps your Telnyx API key secret. The endpoint requires an `authorize` callback by default so a public route cannot mint Telnyx credentials for arbitrary clients:
109
+
110
+ ```ts
111
+ const jwt = new TelnyxJWTEndpoint({
112
+ apiKey: env.TELNYX_API_KEY,
113
+ credentialConnectionId: env.TELNYX_CREDENTIAL_CONNECTION_ID,
114
+ allowedOrigins: ["https://your-app.example"],
115
+ authorize: async (request) => {
116
+ // Check your app session, signed token, or other auth state here.
117
+ return Boolean(request.headers.get("Authorization"));
118
+ }
119
+ });
120
+
121
+ return jwt.handleRequest(request);
122
+ ```
123
+
124
+ Create a browser bridge and connect it with `TelnyxPhoneClient`:
125
+
126
+ ```ts
127
+ const telnyx = await createTelnyxVoiceConfig({
128
+ jwtEndpoint: "/api/telnyx-token",
129
+ autoAnswer: true
130
+ });
131
+
132
+ const client = new TelnyxPhoneClient({
133
+ transport: new WebSocketVoiceTransport({ agent: "my-voice-agent" }),
134
+ bridge: telnyx.bridge
135
+ });
136
+ ```
137
+
138
+ `TelnyxPhoneClient` sends phone-call audio to the agent as PCM16. For playback,
139
+ it routes PCM16 responses directly to the phone bridge and decodes formats such
140
+ as MP3 in the browser before playback. If you use the lower-level
141
+ `TelnyxPhoneTransport`, configure your server agent to send PCM16 audio because
142
+ that transport does not decode non-PCM formats.
143
+
144
+ ## Environment variables
145
+
146
+ | Variable | Required | Description |
147
+ | --------------------------------- | -------------- | ----------------------------------------------------------------------------- |
148
+ | `TELNYX_API_KEY` | Yes | Telnyx API key. Store as a Worker secret. |
149
+ | `TELNYX_CREDENTIAL_CONNECTION_ID` | Telephony only | Credential connection ID used by `TelnyxJWTEndpoint` for WebRTC login tokens. |
150
+
151
+ Set secrets with Wrangler:
152
+
153
+ ```bash
154
+ wrangler secret put TELNYX_API_KEY
155
+ wrangler secret put TELNYX_CREDENTIAL_CONNECTION_ID
156
+ ```
157
+
158
+ ## Attribution
159
+
160
+ This package is adapted from Telnyx's `@telnyx/voice-cloudflare` implementation, whose npm package metadata declares the MIT license.
@@ -0,0 +1,304 @@
1
+ import {
2
+ TranscriptMessage,
3
+ VoiceAudioFormat,
4
+ VoiceAudioInput,
5
+ VoicePipelineMetrics,
6
+ VoiceStatus,
7
+ VoiceTransport
8
+ } from "@cloudflare/voice/client";
9
+
10
+ //#region src/providers/call-bridge.d.ts
11
+ /**
12
+ * Configuration for the TelnyxCallBridge.
13
+ *
14
+ * Uses JWT authentication (browser-side). The JWT is generated
15
+ * server-side from a Telnyx API key + credential connection.
16
+ */
17
+ interface TelnyxCallBridgeConfig {
18
+ /** JWT token from the Telnyx telephony credentials API. */
19
+ loginToken: string;
20
+ /** Automatically answer inbound calls. @default false */
21
+ autoAnswer?: boolean;
22
+ /** Enable debug logging. @default false */
23
+ debug?: boolean;
24
+ }
25
+ /**
26
+ * Bridges Telnyx phone calls into the Cloudflare voice pipeline.
27
+ *
28
+ * Implements `VoiceAudioInput` from @cloudflare/voice — extracts PCM
29
+ * audio from inbound phone calls and feeds it to the AI pipeline.
30
+ * Also provides `playAudio()` for injecting response audio back
31
+ * into the phone call.
32
+ *
33
+ * Usage:
34
+ * ```typescript
35
+ * const bridge = new TelnyxCallBridge({ loginToken: jwt });
36
+ * const voiceClient = new VoiceClient({
37
+ * agent: "my-agent",
38
+ * audioInput: bridge,
39
+ * });
40
+ * ```
41
+ */
42
+ declare class TelnyxCallBridge implements VoiceAudioInput {
43
+ onAudioLevel: ((rms: number) => void) | null;
44
+ onAudioData?: ((pcm: ArrayBuffer) => void) | null;
45
+ private readonly config;
46
+ private _connected;
47
+ private _activeCall;
48
+ private client;
49
+ private captureContext;
50
+ private captureSource;
51
+ private captureWorklet;
52
+ private captureBlobUrl;
53
+ private captureAudioEl;
54
+ private statsInterval;
55
+ private playbackContext;
56
+ private playbackWorklet;
57
+ private playbackBlobUrl;
58
+ private startPromise;
59
+ private finishStart;
60
+ private startAttempt;
61
+ private mediaSetupAttempt;
62
+ constructor(config: TelnyxCallBridgeConfig);
63
+ /** Whether the Telnyx client is connected to the platform. */
64
+ get connected(): boolean;
65
+ /** The currently active Telnyx call, or null. */
66
+ get activeCall(): unknown | null;
67
+ /** Connect to Telnyx and start listening for calls. */
68
+ start(): Promise<void>;
69
+ /** Answer the current inbound call. */
70
+ answer(): void;
71
+ /** End the active call. */
72
+ hangup(): void;
73
+ /**
74
+ * Initiate an outbound PSTN call.
75
+ * @param destination Phone number or SIP URI to call.
76
+ * @param callerNumber The caller ID number to present.
77
+ * @returns The Telnyx Call object.
78
+ */
79
+ dial(destination: string, callerNumber?: string): unknown;
80
+ /** Send DTMF digits on the active call. */
81
+ sendDTMF(digits: string): void;
82
+ /**
83
+ * Clear any buffered audio in the playback pipeline.
84
+ * Used during interrupt detection to stop stale audio from playing.
85
+ */
86
+ clearPlaybackBuffer(): void;
87
+ /**
88
+ * Inject PCM audio into the active phone call (agent → caller).
89
+ * Accepts 16kHz mono Int16 PCM. Upsamples to 48kHz for WebRTC.
90
+ * No-op if no active call.
91
+ */
92
+ playAudio(pcm: ArrayBuffer): void;
93
+ /** Disconnect from Telnyx and clean up all resources. */
94
+ stop(): void;
95
+ private handleNotification;
96
+ private isCurrentMediaSetup;
97
+ private cleanupCaptureResources;
98
+ private cleanupPlaybackResources;
99
+ private startAudioCapture;
100
+ private monitorInboundStats;
101
+ private stopAudioCapture;
102
+ private startAudioPlayback;
103
+ private stopAudioPlayback;
104
+ }
105
+ //#endregion
106
+ //#region src/phone-client.d.ts
107
+ interface TelnyxPhoneClientConfig {
108
+ /** The transport for server communication (e.g. WebSocketVoiceTransport). */
109
+ transport: VoiceTransport;
110
+ /** The call bridge for PSTN audio I/O. */
111
+ bridge: TelnyxCallBridge;
112
+ /**
113
+ * Preferred audio format sent to the server in `start_call`.
114
+ * Must be `"pcm16"` for TelnyxCallBridge compatibility.
115
+ * @default "pcm16"
116
+ */
117
+ preferredFormat?: VoiceAudioFormat;
118
+ /** RMS threshold below which audio is considered silence. @default 0.04 */
119
+ silenceThreshold?: number;
120
+ /** How long silence must last before sending end_of_speech (ms). @default 500 */
121
+ silenceDurationMs?: number;
122
+ /** RMS threshold for detecting user speech during agent playback. @default 0.05 */
123
+ interruptThreshold?: number;
124
+ /** Consecutive high-RMS chunks needed to trigger an interrupt. @default 2 */
125
+ interruptChunks?: number;
126
+ /** Maximum transcript messages to keep in memory. @default 200 */
127
+ maxTranscriptMessages?: number;
128
+ }
129
+ interface TelnyxPhoneClientEventMap {
130
+ statuschange: VoiceStatus;
131
+ transcriptchange: TranscriptMessage[];
132
+ interimtranscript: string | null;
133
+ metricschange: VoicePipelineMetrics | null;
134
+ audiolevelchange: number;
135
+ connectionchange: boolean;
136
+ error: string | null;
137
+ mutechange: boolean;
138
+ custommessage: unknown;
139
+ }
140
+ type TelnyxPhoneClientEvent = keyof TelnyxPhoneClientEventMap;
141
+ declare class TelnyxPhoneClient {
142
+ private _status;
143
+ private _transcript;
144
+ private _metrics;
145
+ private _audioLevel;
146
+ private _isMuted;
147
+ private _connected;
148
+ private _error;
149
+ private _interimTranscript;
150
+ private _lastCustomMessage;
151
+ private _audioFormat;
152
+ private _serverProtocolVersion;
153
+ private inCall;
154
+ private isPlaying;
155
+ private isSpeaking;
156
+ private silenceTimer;
157
+ private interruptChunkCount;
158
+ private warnedFormat;
159
+ private listeners;
160
+ private transport;
161
+ private bridge;
162
+ private preferredFormat;
163
+ private silenceThreshold;
164
+ private silenceDurationMs;
165
+ private interruptThreshold;
166
+ private interruptChunks;
167
+ private maxTranscriptMessages;
168
+ constructor(config: TelnyxPhoneClientConfig);
169
+ get status(): VoiceStatus;
170
+ get transcript(): TranscriptMessage[];
171
+ get metrics(): VoicePipelineMetrics | null;
172
+ get audioLevel(): number;
173
+ get isMuted(): boolean;
174
+ get connected(): boolean;
175
+ get error(): string | null;
176
+ get interimTranscript(): string | null;
177
+ get lastCustomMessage(): unknown;
178
+ get audioFormat(): VoiceAudioFormat | null;
179
+ get serverProtocolVersion(): number | null;
180
+ addEventListener<K extends TelnyxPhoneClientEvent>(
181
+ event: K,
182
+ listener: (data: TelnyxPhoneClientEventMap[K]) => void
183
+ ): void;
184
+ removeEventListener<K extends TelnyxPhoneClientEvent>(
185
+ event: K,
186
+ listener: (data: TelnyxPhoneClientEventMap[K]) => void
187
+ ): void;
188
+ private emit;
189
+ /** Open the transport connection and send the protocol handshake. */
190
+ connect(): void;
191
+ /** End any active call, then close the transport. */
192
+ disconnect(): void;
193
+ /**
194
+ * Start a voice call. Wires up the bridge audio callbacks,
195
+ * starts the bridge, and sends `start_call` to the server.
196
+ *
197
+ * The bridge's `start()` is called here — do not call it separately.
198
+ */
199
+ startCall(): Promise<void>;
200
+ /**
201
+ * End the voice call. Detaches audio callbacks from the bridge
202
+ * and sends `end_call` to the server.
203
+ *
204
+ * Does NOT stop the bridge or hang up the phone — call
205
+ * `bridge.stop()` or `cleanup()` separately for that.
206
+ */
207
+ endCall(): void;
208
+ /** Toggle mute. When muted, audio is not sent to the server. */
209
+ toggleMute(): void;
210
+ /** Send a text message to the agent (bypasses STT, goes to onTurn). */
211
+ sendText(text: string): void;
212
+ /** Send arbitrary JSON to the agent (app-level messages). */
213
+ sendJSON(data: Record<string, unknown>): void;
214
+ private createStartCallMessage;
215
+ private handleJSON;
216
+ private handleAudio;
217
+ private decodeContext;
218
+ private decodeAndPlay;
219
+ private processAudioLevel;
220
+ private resetDetection;
221
+ private trimTranscript;
222
+ }
223
+ //#endregion
224
+ //#region src/transport/phone-transport.d.ts
225
+ interface TelnyxPhoneTransportConfig {
226
+ /**
227
+ * The underlying transport to wrap. Typically a `WebSocketVoiceTransport`
228
+ * from @cloudflare/voice/client.
229
+ */
230
+ inner: VoiceTransport;
231
+ /** The call bridge to route audio into. */
232
+ bridge: TelnyxCallBridge;
233
+ /**
234
+ * Optional callback for every binary audio frame received from the server.
235
+ * Called with the raw ArrayBuffer regardless of format.
236
+ */
237
+ onServerAudio?: (audio: ArrayBuffer) => void;
238
+ }
239
+ declare class TelnyxPhoneTransport implements VoiceTransport {
240
+ private inner;
241
+ private bridge;
242
+ private audioFormat;
243
+ private warnedFormat;
244
+ private userAudioCallback?;
245
+ onopen: (() => void) | null;
246
+ onclose: (() => void) | null;
247
+ onerror: ((error?: unknown) => void) | null;
248
+ onmessage: ((data: string | ArrayBuffer | Blob) => void) | null;
249
+ constructor(config: TelnyxPhoneTransportConfig);
250
+ get connected(): boolean;
251
+ sendJSON(data: Record<string, unknown>): void;
252
+ sendBinary(data: ArrayBuffer): void;
253
+ connect(): void;
254
+ disconnect(): void;
255
+ private intercept;
256
+ /** Parse audio_config messages to know what format the server is sending. */
257
+ private trackAudioConfig;
258
+ /** Fork audio to the bridge (pcm16 only) and optional user callback. */
259
+ private routeAudio;
260
+ }
261
+ //#endregion
262
+ //#region src/helpers/transport-config.d.ts
263
+ interface TelnyxVoiceConfigOptions {
264
+ /** URL of the JWT endpoint (the TelnyxJWTEndpoint handler). */
265
+ jwtEndpoint: string;
266
+ /** Automatically answer inbound calls. @default false */
267
+ autoAnswer?: boolean;
268
+ /** Enable Telnyx SDK debug logging. @default false */
269
+ debug?: boolean;
270
+ }
271
+ interface TelnyxVoiceSetup {
272
+ /** The TelnyxCallBridge instance — use for playAudio(), dial(), hangup(), etc. */
273
+ bridge: TelnyxCallBridge;
274
+ /** Pass this to VoiceClientOptions.audioInput. Same as `bridge`. */
275
+ audioInput: TelnyxCallBridge;
276
+ /** The server-side credential ID (for manual cleanup if needed). */
277
+ credentialId: string;
278
+ /** The SIP username (e.g. "genCredXYZ123") — call this to reach the agent. */
279
+ sipUsername: string;
280
+ /** Stop the bridge and revoke the server-side credential. */
281
+ cleanup: () => Promise<void>;
282
+ }
283
+ /**
284
+ * Fetch a JWT from the server, create a TelnyxCallBridge, and return
285
+ * everything needed to configure a VoiceClient for phone calls.
286
+ */
287
+ declare function createTelnyxVoiceConfig(
288
+ options: TelnyxVoiceConfigOptions
289
+ ): Promise<TelnyxVoiceSetup>;
290
+ //#endregion
291
+ export {
292
+ TelnyxCallBridge,
293
+ type TelnyxCallBridgeConfig,
294
+ TelnyxPhoneClient,
295
+ type TelnyxPhoneClientConfig,
296
+ type TelnyxPhoneClientEvent,
297
+ type TelnyxPhoneClientEventMap,
298
+ TelnyxPhoneTransport,
299
+ type TelnyxPhoneTransportConfig,
300
+ type TelnyxVoiceConfigOptions,
301
+ type TelnyxVoiceSetup,
302
+ createTelnyxVoiceConfig
303
+ };
304
+ //# sourceMappingURL=browser.d.ts.map