@kognitivedev/voice-media-bridge 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/routes.js ADDED
@@ -0,0 +1,98 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.createPhoneAudioRoute = createPhoneAudioRoute;
4
+ exports.resolvePhoneAudioRoute = resolvePhoneAudioRoute;
5
+ exports.phoneAudioRouteMetadata = phoneAudioRouteMetadata;
6
+ exports.getStableCarrierProfile = getStableCarrierProfile;
7
+ const profiles_1 = require("./profiles");
8
+ function normalizeAiProvider(value) {
9
+ return value === "xai-realtime" || value === "gemini-live" ? value : "openai-realtime";
10
+ }
11
+ function pcmLeg(sampleRate) {
12
+ return {
13
+ codec: "pcm",
14
+ format: "audio/pcm",
15
+ sampleRate,
16
+ mimeType: `audio/pcm;rate=${sampleRate}`,
17
+ };
18
+ }
19
+ const PCMU_LEG = {
20
+ codec: "pcmu",
21
+ format: "audio/pcmu",
22
+ sampleRate: 8000,
23
+ mimeType: "audio/pcmu",
24
+ };
25
+ function resolveAiLegs(aiProvider, carrier) {
26
+ if (aiProvider === "gemini-live") {
27
+ return {
28
+ aiInput: pcmLeg(16000),
29
+ aiOutput: pcmLeg(24000),
30
+ };
31
+ }
32
+ if (carrier.codec === "pcmu") {
33
+ return {
34
+ aiInput: PCMU_LEG,
35
+ aiOutput: PCMU_LEG,
36
+ };
37
+ }
38
+ if (aiProvider === "xai-realtime") {
39
+ const leg = pcmLeg(carrier.carrierSampleRate === 16000 ? 16000 : 24000);
40
+ return {
41
+ aiInput: leg,
42
+ aiOutput: leg,
43
+ };
44
+ }
45
+ const leg = pcmLeg(24000);
46
+ return {
47
+ aiInput: leg,
48
+ aiOutput: leg,
49
+ };
50
+ }
51
+ function createPhoneAudioRoute(carrier, aiProviderInput) {
52
+ const aiProvider = normalizeAiProvider(aiProviderInput);
53
+ const { aiInput, aiOutput } = resolveAiLegs(aiProvider, carrier);
54
+ const inputTranscoding = carrier.codec === "pcmu" && aiInput.codec === "pcmu" && carrier.carrierSampleRate === aiInput.sampleRate
55
+ ? "none"
56
+ : "decode-resample";
57
+ const outputTranscoding = carrier.codec === "pcmu" && aiOutput.codec === "pcmu" && carrier.carrierSampleRate === aiOutput.sampleRate
58
+ ? "none"
59
+ : "resample-encode";
60
+ return {
61
+ id: `${carrier.id}__${aiProvider}__in-${aiInput.codec}-${aiInput.sampleRate}__out-${aiOutput.codec}-${aiOutput.sampleRate}`,
62
+ carrierProvider: carrier.provider,
63
+ aiProvider,
64
+ carrier,
65
+ aiInput,
66
+ aiOutput,
67
+ inputTranscoding,
68
+ outputTranscoding,
69
+ stable: carrier.id !== "telnyx-l16-16k",
70
+ };
71
+ }
72
+ function resolvePhoneAudioRoute(input) {
73
+ const carrier = (0, profiles_1.resolvePhoneMediaProfile)({
74
+ provider: input.carrierProvider,
75
+ requestedCodec: input.requestedCodec,
76
+ l16ByteOrder: input.l16ByteOrder,
77
+ fallbackState: input.fallbackState,
78
+ });
79
+ return createPhoneAudioRoute(carrier, input.aiProvider);
80
+ }
81
+ function phoneAudioRouteMetadata(route) {
82
+ return {
83
+ id: route.id,
84
+ carrierProvider: route.carrierProvider,
85
+ aiProvider: route.aiProvider,
86
+ carrier: (0, profiles_1.phoneMediaProfileMetadata)(route.carrier),
87
+ aiInput: route.aiInput,
88
+ aiOutput: route.aiOutput,
89
+ inputTranscoding: route.inputTranscoding,
90
+ outputTranscoding: route.outputTranscoding,
91
+ stable: route.stable,
92
+ };
93
+ }
94
+ function getStableCarrierProfile(provider) {
95
+ if (provider === "telnyx")
96
+ return Object.assign(Object.assign({}, (0, profiles_1.getPhoneMediaProfile)("telnyx-pcmu-8k")), { source: "auto" });
97
+ return (0, profiles_1.resolvePhoneMediaProfile)({ provider, requestedCodec: "auto" });
98
+ }
package/package.json ADDED
@@ -0,0 +1,39 @@
1
+ {
2
+ "name": "@kognitivedev/voice-media-bridge",
3
+ "version": "0.2.29",
4
+ "main": "dist/index.js",
5
+ "types": "dist/index.d.ts",
6
+ "exports": {
7
+ ".": {
8
+ "types": "./dist/index.d.ts",
9
+ "default": "./dist/index.js"
10
+ }
11
+ },
12
+ "publishConfig": {
13
+ "access": "public"
14
+ },
15
+ "scripts": {
16
+ "build": "tsc",
17
+ "dev": "tsc -w --noCheck",
18
+ "test": "vitest run",
19
+ "prepublishOnly": "npm run build"
20
+ },
21
+ "dependencies": {
22
+ "@kognitivedev/telephony": "^0.2.29"
23
+ },
24
+ "devDependencies": {
25
+ "@types/node": "^20.0.0",
26
+ "typescript": "^5.0.0",
27
+ "vitest": "^3.0.0"
28
+ },
29
+ "description": "Reusable realtime telephony media bridge helpers for Kognitive voice agents",
30
+ "keywords": [
31
+ "kognitive",
32
+ "voice",
33
+ "media",
34
+ "telnyx",
35
+ "twilio",
36
+ "realtime"
37
+ ],
38
+ "license": "MIT"
39
+ }
@@ -0,0 +1,229 @@
1
+ import { describe, expect, it } from "vitest";
2
+ import {
3
+ base64ToPcm16Le,
4
+ decodeCarrierPayload,
5
+ encodeCarrierPayload,
6
+ frameDurationMs,
7
+ fromAiOutputAudio,
8
+ fromOpenAIOutputAudio,
9
+ getPhoneMediaProfile,
10
+ isOpenAIRealtimeAudioReady,
11
+ phoneAudioRouteMetadata,
12
+ openAIRealtimeAudioFormat,
13
+ pcm16ToBase64Le,
14
+ realtimeAudioFormatForRoute,
15
+ realtimeOutputAudioFormatForRoute,
16
+ resolvePhoneAudioRoute,
17
+ resolvePhoneMediaProfile,
18
+ summarizePcm16Signal,
19
+ toAiInputAudio,
20
+ toOpenAIInputAudio,
21
+ } from "../index";
22
+
23
+ describe("voice media bridge audio profiles", () => {
24
+ it("round-trips PCMU within lossy telephone tolerance", () => {
25
+ const profile = getPhoneMediaProfile("twilio-pcmu-8k");
26
+ const samples = new Int16Array([0, 256, -512, 1024, -2048, 4096, -8192, 12000]);
27
+
28
+ const payload = encodeCarrierPayload(profile, samples);
29
+ const decoded = decodeCarrierPayload(profile, payload);
30
+
31
+ expect(decoded.length).toBe(samples.length);
32
+ for (let index = 0; index < samples.length; index += 1) {
33
+ expect(Math.abs((decoded[index] ?? 0) - (samples[index] ?? 0))).toBeLessThan(900);
34
+ }
35
+ });
36
+
37
+ it("preserves L16 little-endian PCM16 values and duration", () => {
38
+ const profile = getPhoneMediaProfile("telnyx-l16-16k");
39
+ const samples = new Int16Array([0, 1, -1, 32767, -32768, 1600, -2400]);
40
+
41
+ const payload = encodeCarrierPayload(profile, samples);
42
+ const decoded = decodeCarrierPayload(profile, payload);
43
+
44
+ expect(Array.from(decoded)).toEqual(Array.from(samples));
45
+ expect(frameDurationMs(320, profile.carrierSampleRate)).toBe(20);
46
+ expect(base64ToPcm16Le(pcm16ToBase64Le(samples))).toEqual(samples);
47
+ });
48
+
49
+ it("can force Telnyx L16 little-endian decoding for live codec experiments", () => {
50
+ const profile = resolvePhoneMediaProfile({
51
+ provider: "telnyx",
52
+ requestedCodec: "l16_16k",
53
+ l16ByteOrder: "le",
54
+ });
55
+ const samples = new Int16Array([0, 1200, -2400, 4800]);
56
+
57
+ const payload = encodeCarrierPayload(profile, samples);
58
+
59
+ expect(profile).toMatchObject({
60
+ id: "telnyx-l16-16k",
61
+ carrierByteOrder: "le",
62
+ source: "forced",
63
+ });
64
+ expect(base64ToPcm16Le(payload)).toEqual(samples);
65
+ expect(decodeCarrierPayload(profile, payload)).toEqual(samples);
66
+ });
67
+
68
+ it("resamples Telnyx L16 16k to OpenAI PCM16 24k and back without clipping", () => {
69
+ const profile = getPhoneMediaProfile("telnyx-l16-16k");
70
+ const carrierSamples = new Int16Array(1600);
71
+ for (let index = 0; index < carrierSamples.length; index += 1) {
72
+ carrierSamples[index] = Math.round(Math.sin(index / 8) * 9000);
73
+ }
74
+
75
+ const openAIPayload = toOpenAIInputAudio(profile, encodeCarrierPayload(profile, carrierSamples));
76
+ const openAISamples = base64ToPcm16Le(openAIPayload);
77
+ expect(openAISamples.length).toBe(2400);
78
+
79
+ const output = fromOpenAIOutputAudio(profile, pcm16ToBase64Le(openAISamples));
80
+ expect(output.samples.length).toBe(1600);
81
+ expect(output.durationMs).toBe(100);
82
+ expect(output.signal.clippedSamples).toBe(0);
83
+ });
84
+
85
+ it("summarizes silence, weak input, likely speech, and clipping", () => {
86
+ expect(summarizePcm16Signal(new Int16Array(160))).toMatchObject({
87
+ peakAbs: 0,
88
+ rms: 0,
89
+ clippedSamples: 0,
90
+ });
91
+
92
+ const weak = summarizePcm16Signal(new Int16Array([10, -12, 8, -9]));
93
+ expect(weak.rms).toBeLessThan(400);
94
+ expect(weak.peakAbs).toBeLessThan(2500);
95
+
96
+ const speech = summarizePcm16Signal(new Int16Array([0, 1200, -1800, 2600, -3200]));
97
+ expect(speech.rms).toBeGreaterThanOrEqual(400);
98
+ expect(speech.peakAbs).toBeGreaterThanOrEqual(2500);
99
+
100
+ const clipped = summarizePcm16Signal(new Int16Array([32767, -32768, 100]));
101
+ expect(clipped.clippedSamples).toBe(2);
102
+ expect(clipped.clippingRatio).toBeGreaterThan(0.6);
103
+ });
104
+
105
+ it("selects stable PCMU automatically and keeps Telnyx L16 explicit-only", () => {
106
+ expect(resolvePhoneMediaProfile({ provider: "telnyx", requestedCodec: "auto" })).toMatchObject({
107
+ id: "telnyx-pcmu-8k",
108
+ carrierCodec: "PCMU",
109
+ carrierSampleRate: 8000,
110
+ source: "auto",
111
+ });
112
+ expect(resolvePhoneMediaProfile({ provider: "telnyx", requestedCodec: "l16_16k" })).toMatchObject({
113
+ id: "telnyx-l16-16k",
114
+ carrierCodec: "L16",
115
+ carrierSampleRate: 16000,
116
+ source: "forced",
117
+ });
118
+ expect(resolvePhoneMediaProfile({ provider: "telnyx", requestedCodec: "pcmu_8k" })).toMatchObject({
119
+ id: "telnyx-pcmu-8k",
120
+ source: "forced",
121
+ });
122
+ expect(resolvePhoneMediaProfile({ provider: "telnyx", requestedCodec: "auto", fallbackState: { forcePcmu: true } })).toMatchObject({
123
+ id: "telnyx-pcmu-8k",
124
+ source: "fallback",
125
+ });
126
+ expect(resolvePhoneMediaProfile({ provider: "twilio", requestedCodec: "l16_16k" })).toMatchObject({
127
+ id: "twilio-pcmu-8k",
128
+ });
129
+ expect(resolvePhoneMediaProfile({ provider: "sip", requestedCodec: "auto" })).toMatchObject({
130
+ id: "sip-pcmu-8k",
131
+ provider: "sip",
132
+ carrierCodec: "PCMU",
133
+ openAIFormat: "audio/pcmu",
134
+ });
135
+ });
136
+
137
+ it("resolves stable audio routes for every phone provider and AI provider", () => {
138
+ const phoneProviders = ["twilio", "telnyx", "sip"] as const;
139
+ const aiProviders = ["openai-realtime", "xai-realtime", "gemini-live"] as const;
140
+
141
+ for (const carrierProvider of phoneProviders) {
142
+ for (const aiProvider of aiProviders) {
143
+ const route = resolvePhoneAudioRoute({
144
+ carrierProvider,
145
+ aiProvider,
146
+ requestedCodec: "auto",
147
+ });
148
+
149
+ expect(route.carrier.codec).toBe("pcmu");
150
+ expect(route.carrier.carrierSampleRate).toBe(8000);
151
+ expect(route.stable).toBe(true);
152
+ expect(phoneAudioRouteMetadata(route)).toMatchObject({
153
+ carrierProvider,
154
+ aiProvider,
155
+ carrier: {
156
+ codec: "PCMU",
157
+ sampleRate: 8000,
158
+ },
159
+ });
160
+
161
+ if (aiProvider === "gemini-live") {
162
+ expect(route.aiInput).toMatchObject({ codec: "pcm", format: "audio/pcm", sampleRate: 16000, mimeType: "audio/pcm;rate=16000" });
163
+ expect(route.aiOutput).toMatchObject({ codec: "pcm", format: "audio/pcm", sampleRate: 24000, mimeType: "audio/pcm;rate=24000" });
164
+ expect(route.inputTranscoding).toBe("decode-resample");
165
+ expect(route.outputTranscoding).toBe("resample-encode");
166
+ expect(realtimeAudioFormatForRoute(route)).toEqual({ type: "audio/pcm", rate: 16000 });
167
+ expect(realtimeOutputAudioFormatForRoute(route)).toEqual({ type: "audio/pcm", rate: 24000 });
168
+ } else {
169
+ expect(route.aiInput).toMatchObject({ codec: "pcmu", format: "audio/pcmu", sampleRate: 8000, mimeType: "audio/pcmu" });
170
+ expect(route.aiOutput).toMatchObject({ codec: "pcmu", format: "audio/pcmu", sampleRate: 8000, mimeType: "audio/pcmu" });
171
+ expect(route.inputTranscoding).toBe("none");
172
+ expect(route.outputTranscoding).toBe("none");
173
+ expect(realtimeAudioFormatForRoute(route)).toEqual({ type: "audio/pcmu" });
174
+ expect(realtimeOutputAudioFormatForRoute(route)).toEqual({ type: "audio/pcmu" });
175
+ }
176
+ }
177
+ }
178
+ });
179
+
180
+ it("passes PCMU through for OpenAI/xAI and transcodes Gemini through PCM16 16k", () => {
181
+ const carrierSamples = new Int16Array(800);
182
+ for (let index = 0; index < carrierSamples.length; index += 1) {
183
+ carrierSamples[index] = Math.round(Math.sin(index / 8) * 9000);
184
+ }
185
+ const carrierProfile = getPhoneMediaProfile("telnyx-pcmu-8k");
186
+ const carrierPayload = encodeCarrierPayload(carrierProfile, carrierSamples);
187
+ const passthroughRoute = resolvePhoneAudioRoute({ carrierProvider: "telnyx", aiProvider: "openai-realtime", requestedCodec: "auto" });
188
+ const geminiRoute = resolvePhoneAudioRoute({ carrierProvider: "telnyx", aiProvider: "gemini-live", requestedCodec: "auto" });
189
+
190
+ expect(toAiInputAudio(passthroughRoute, carrierPayload)).toBe(carrierPayload);
191
+ expect(fromAiOutputAudio(passthroughRoute, carrierPayload).payload).toBe(carrierPayload);
192
+
193
+ const geminiInput = toAiInputAudio(geminiRoute, carrierPayload);
194
+ expect(geminiInput).not.toBe(carrierPayload);
195
+ expect(base64ToPcm16Le(geminiInput).length).toBe(1600);
196
+
197
+ const geminiOutput = fromAiOutputAudio(geminiRoute, pcm16ToBase64Le(base64ToPcm16Le(geminiInput)), 16000);
198
+ expect(geminiOutput.samples.length).toBe(800);
199
+ expect(geminiOutput.durationMs).toBe(100);
200
+ });
201
+
202
+ it("configures OpenAI session audio as PCMU for Twilio and PCM for Telnyx HD", () => {
203
+ const twilio = getPhoneMediaProfile("twilio-pcmu-8k");
204
+ const telnyx = getPhoneMediaProfile("telnyx-l16-16k");
205
+
206
+ expect(openAIRealtimeAudioFormat(twilio)).toEqual({ type: "audio/pcmu" });
207
+ expect(openAIRealtimeAudioFormat(telnyx)).toEqual({ type: "audio/pcm", rate: 24000 });
208
+
209
+ expect(isOpenAIRealtimeAudioReady({
210
+ type: "session.updated",
211
+ session: {
212
+ audio: {
213
+ input: { format: { type: "audio/pcm", rate: 24000 } },
214
+ output: { format: { type: "audio/pcm", rate: 24000 } },
215
+ },
216
+ },
217
+ }, telnyx)).toBe(true);
218
+
219
+ expect(isOpenAIRealtimeAudioReady({
220
+ type: "session.updated",
221
+ session: {
222
+ audio: {
223
+ input: { format: { type: "audio/pcmu" } },
224
+ output: { format: { type: "audio/pcmu" } },
225
+ },
226
+ },
227
+ }, twilio)).toBe(true);
228
+ });
229
+ });
package/src/audio.ts ADDED
@@ -0,0 +1,182 @@
1
+ import {
2
+ decodeTwilioMulawBase64ToPcm16,
3
+ encodePcm16ToTwilioMulawBase64,
4
+ resamplePcm16WindowedSinc,
5
+ } from "@kognitivedev/telephony";
6
+ import type { PhoneMediaProfile } from "./profiles";
7
+ import type { PhoneAudioRoute } from "./routes";
8
+
9
+ export interface PcmSignalSummary {
10
+ samples: number;
11
+ peakAbs: number;
12
+ rms: number;
13
+ clippedSamples: number;
14
+ clippingRatio: number;
15
+ bytes: number;
16
+ }
17
+
18
+ export interface CarrierAudioOutput {
19
+ payload: string;
20
+ durationMs: number;
21
+ signal: PcmSignalSummary;
22
+ samples: Int16Array;
23
+ }
24
+
25
+ export const OPENAI_PCM_SAMPLE_RATE = 24000;
26
+ const CLIP_THRESHOLD = 32760;
27
+
28
+ export function pcm16ToBase64Le(samples: Int16Array): string {
29
+ const buffer = Buffer.alloc(samples.length * 2);
30
+ for (let index = 0; index < samples.length; index += 1) {
31
+ buffer.writeInt16LE(samples[index] ?? 0, index * 2);
32
+ }
33
+ return buffer.toString("base64");
34
+ }
35
+
36
+ export function base64ToPcm16Le(data: string): Int16Array {
37
+ const buffer = Buffer.from(data, "base64");
38
+ const samples = new Int16Array(Math.floor(buffer.length / 2));
39
+ for (let index = 0; index < samples.length; index += 1) {
40
+ samples[index] = buffer.readInt16LE(index * 2);
41
+ }
42
+ return samples;
43
+ }
44
+
45
+ export function pcm16ToBase64Be(samples: Int16Array): string {
46
+ const buffer = Buffer.alloc(samples.length * 2);
47
+ for (let index = 0; index < samples.length; index += 1) {
48
+ buffer.writeInt16BE(samples[index] ?? 0, index * 2);
49
+ }
50
+ return buffer.toString("base64");
51
+ }
52
+
53
+ export function base64ToPcm16Be(data: string): Int16Array {
54
+ const buffer = Buffer.from(data, "base64");
55
+ const samples = new Int16Array(Math.floor(buffer.length / 2));
56
+ for (let index = 0; index < samples.length; index += 1) {
57
+ samples[index] = buffer.readInt16BE(index * 2);
58
+ }
59
+ return samples;
60
+ }
61
+
62
+ export function summarizePcm16Signal(samples: Int16Array, bytes = samples.length * 2): PcmSignalSummary {
63
+ if (samples.length === 0) {
64
+ return { samples: 0, peakAbs: 0, rms: 0, clippedSamples: 0, clippingRatio: 0, bytes };
65
+ }
66
+
67
+ let peakAbs = 0;
68
+ let squareSum = 0;
69
+ let clippedSamples = 0;
70
+ for (const sample of samples) {
71
+ const abs = Math.abs(sample);
72
+ if (abs > peakAbs) peakAbs = abs;
73
+ if (abs >= CLIP_THRESHOLD) clippedSamples += 1;
74
+ squareSum += sample * sample;
75
+ }
76
+
77
+ return {
78
+ samples: samples.length,
79
+ peakAbs,
80
+ rms: Math.round(Math.sqrt(squareSum / samples.length)),
81
+ clippedSamples,
82
+ clippingRatio: clippedSamples / samples.length,
83
+ bytes,
84
+ };
85
+ }
86
+
87
+ export function frameDurationMs(sampleCount: number, sampleRate: number): number {
88
+ if (sampleRate <= 0) return 0;
89
+ return Math.round((sampleCount / sampleRate) * 1000);
90
+ }
91
+
92
+ export function decodeCarrierPayload(profile: PhoneMediaProfile, payload: string): Int16Array {
93
+ if (profile.codec === "pcmu") return decodeTwilioMulawBase64ToPcm16(payload);
94
+ return profile.carrierByteOrder === "le" ? base64ToPcm16Le(payload) : base64ToPcm16Be(payload);
95
+ }
96
+
97
+ export function encodeCarrierPayload(profile: PhoneMediaProfile, samples: Int16Array): string {
98
+ if (profile.codec === "pcmu") return encodePcm16ToTwilioMulawBase64(samples);
99
+ return profile.carrierByteOrder === "le" ? pcm16ToBase64Le(samples) : pcm16ToBase64Be(samples);
100
+ }
101
+
102
+ export function summarizeCarrierPayload(profile: PhoneMediaProfile, payload: string): PcmSignalSummary {
103
+ const bytes = Buffer.byteLength(payload, "base64");
104
+ return summarizePcm16Signal(decodeCarrierPayload(profile, payload), bytes);
105
+ }
106
+
107
+ export function toOpenAIInputAudio(profile: PhoneMediaProfile, payload: string): string {
108
+ if (profile.openAIFormat === "audio/pcmu") return payload;
109
+ const carrierPcm = decodeCarrierPayload(profile, payload);
110
+ const openAIPcm = resamplePcm16WindowedSinc(carrierPcm, profile.carrierSampleRate, profile.openAISampleRate);
111
+ return pcm16ToBase64Le(openAIPcm);
112
+ }
113
+
114
+ export function fromOpenAIOutputAudio(profile: PhoneMediaProfile, delta: string): CarrierAudioOutput {
115
+ if (profile.openAIFormat === "audio/pcmu") {
116
+ const samples = decodeCarrierPayload(profile, delta);
117
+ return {
118
+ payload: delta,
119
+ durationMs: frameDurationMs(samples.length, profile.carrierSampleRate),
120
+ signal: summarizePcm16Signal(samples, Buffer.byteLength(delta, "base64")),
121
+ samples,
122
+ };
123
+ }
124
+
125
+ const openAIPcm = base64ToPcm16Le(delta);
126
+ const carrierPcm = resamplePcm16WindowedSinc(openAIPcm, profile.openAISampleRate, profile.carrierSampleRate);
127
+ const payload = encodeCarrierPayload(profile, carrierPcm);
128
+ return {
129
+ payload,
130
+ durationMs: frameDurationMs(carrierPcm.length, profile.carrierSampleRate),
131
+ signal: summarizePcm16Signal(carrierPcm, Buffer.byteLength(payload, "base64")),
132
+ samples: carrierPcm,
133
+ };
134
+ }
135
+
136
+ export function toAiInputAudio(route: PhoneAudioRoute, payload: string): string {
137
+ if (
138
+ route.carrier.codec === "pcmu"
139
+ && route.aiInput.codec === "pcmu"
140
+ && route.carrier.carrierSampleRate === route.aiInput.sampleRate
141
+ ) {
142
+ return payload;
143
+ }
144
+ const carrierPcm = decodeCarrierPayload(route.carrier, payload);
145
+ const aiPcm = route.carrier.carrierSampleRate === route.aiInput.sampleRate
146
+ ? carrierPcm
147
+ : resamplePcm16WindowedSinc(carrierPcm, route.carrier.carrierSampleRate, route.aiInput.sampleRate);
148
+ return pcm16ToBase64Le(aiPcm);
149
+ }
150
+
151
+ export function fromAiOutputAudio(route: PhoneAudioRoute, delta: string, sourceRate = route.aiOutput.sampleRate): CarrierAudioOutput {
152
+ if (
153
+ route.carrier.codec === "pcmu"
154
+ && route.aiOutput.codec === "pcmu"
155
+ && route.carrier.carrierSampleRate === route.aiOutput.sampleRate
156
+ ) {
157
+ const samples = decodeCarrierPayload(route.carrier, delta);
158
+ return {
159
+ payload: delta,
160
+ durationMs: frameDurationMs(samples.length, route.carrier.carrierSampleRate),
161
+ signal: summarizePcm16Signal(samples, Buffer.byteLength(delta, "base64")),
162
+ samples,
163
+ };
164
+ }
165
+
166
+ const aiPcm = base64ToPcm16Le(delta);
167
+ const carrierPcm = sourceRate === route.carrier.carrierSampleRate
168
+ ? aiPcm
169
+ : resamplePcm16WindowedSinc(aiPcm, sourceRate, route.carrier.carrierSampleRate);
170
+ const payload = encodeCarrierPayload(route.carrier, carrierPcm);
171
+ return {
172
+ payload,
173
+ durationMs: frameDurationMs(carrierPcm.length, route.carrier.carrierSampleRate),
174
+ signal: summarizePcm16Signal(carrierPcm, Buffer.byteLength(payload, "base64")),
175
+ samples: carrierPcm,
176
+ };
177
+ }
178
+
179
+ export function isLikelySpeechPayload(profile: PhoneMediaProfile, payload: string, input: { rms?: number; peakAbs?: number } = {}) {
180
+ const signal = summarizeCarrierPayload(profile, payload);
181
+ return signal.rms >= (input.rms ?? 400) || signal.peakAbs >= (input.peakAbs ?? 2500);
182
+ }
package/src/index.ts ADDED
@@ -0,0 +1,4 @@
1
+ export * from "./audio";
2
+ export * from "./openai";
3
+ export * from "./profiles";
4
+ export * from "./routes";
package/src/openai.ts ADDED
@@ -0,0 +1,127 @@
1
+ import type { PhoneMediaProfile } from "./profiles";
2
+ import { getPhoneMediaProfile } from "./profiles";
3
+ import type { PhoneAudioRoute } from "./routes";
4
+
5
+ function getRecord(value: unknown): Record<string, unknown> {
6
+ return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : {};
7
+ }
8
+
9
+ function getString(value: unknown, fallback = "") {
10
+ return typeof value === "string" && value.trim() ? value.trim() : fallback;
11
+ }
12
+
13
+ function summarizeOpenAIAudioFormat(value: unknown) {
14
+ if (typeof value === "string") return value;
15
+ const record = getRecord(value);
16
+ const type = getString(record.type, "");
17
+ const rate = typeof record.rate === "number" ? record.rate : undefined;
18
+ return type ? { type, ...(rate ? { rate } : {}) } : null;
19
+ }
20
+
21
+ export function openAIRealtimeAudioFormat(profile: PhoneMediaProfile) {
22
+ return profile.openAIFormat === "audio/pcmu"
23
+ ? { type: "audio/pcmu" }
24
+ : { type: "audio/pcm", rate: profile.openAISampleRate };
25
+ }
26
+
27
+ export function realtimeAudioFormatForRoute(route: PhoneAudioRoute) {
28
+ return route.aiInput.format === "audio/pcmu"
29
+ ? { type: "audio/pcmu" }
30
+ : { type: "audio/pcm", rate: route.aiInput.sampleRate };
31
+ }
32
+
33
+ export function realtimeOutputAudioFormatForRoute(route: PhoneAudioRoute) {
34
+ return route.aiOutput.format === "audio/pcmu"
35
+ ? { type: "audio/pcmu" }
36
+ : { type: "audio/pcm", rate: route.aiOutput.sampleRate };
37
+ }
38
+
39
+ export function isOpenAIRealtimeAudioReady(event: unknown, profile: PhoneMediaProfile = getPhoneMediaProfile("twilio-pcmu-8k")) {
40
+ const record = getRecord(event);
41
+ if (record.type !== "session.updated") return false;
42
+
43
+ const audio = getRecord(getRecord(record.session).audio);
44
+ const inputFormat = summarizeOpenAIAudioFormat(getRecord(audio.input).format);
45
+ const outputFormat = summarizeOpenAIAudioFormat(getRecord(audio.output).format);
46
+ const inputRecord = getRecord(inputFormat);
47
+ const outputRecord = getRecord(outputFormat);
48
+ const expected = openAIRealtimeAudioFormat(profile);
49
+ const expectedType = expected.type;
50
+ const expectedRate = "rate" in expected ? expected.rate : undefined;
51
+ const inputType = typeof inputFormat === "string" ? inputFormat : getString(inputRecord.type, "");
52
+ const outputType = typeof outputFormat === "string" ? outputFormat : getString(outputRecord.type, "");
53
+ const inputRate = typeof inputRecord.rate === "number" ? inputRecord.rate : undefined;
54
+ const outputRate = typeof outputRecord.rate === "number" ? outputRecord.rate : undefined;
55
+
56
+ if (inputType !== expectedType || outputType !== expectedType) return false;
57
+ if (expectedRate && inputRate && inputRate !== expectedRate) return false;
58
+ if (expectedRate && outputRate && outputRate !== expectedRate) return false;
59
+ return true;
60
+ }
61
+
62
+ export function isRealtimeAudioReadyForRoute(event: unknown, route: PhoneAudioRoute) {
63
+ const record = getRecord(event);
64
+ if (record.type !== "session.updated") return false;
65
+
66
+ const audio = getRecord(getRecord(record.session).audio);
67
+ const inputFormat = summarizeOpenAIAudioFormat(getRecord(audio.input).format);
68
+ const outputFormat = summarizeOpenAIAudioFormat(getRecord(audio.output).format);
69
+ const inputRecord = getRecord(inputFormat);
70
+ const outputRecord = getRecord(outputFormat);
71
+ const expectedInput = realtimeAudioFormatForRoute(route);
72
+ const expectedOutput = realtimeOutputAudioFormatForRoute(route);
73
+ const inputType = typeof inputFormat === "string" ? inputFormat : getString(inputRecord.type, "");
74
+ const outputType = typeof outputFormat === "string" ? outputFormat : getString(outputRecord.type, "");
75
+ const inputRate = typeof inputRecord.rate === "number" ? inputRecord.rate : undefined;
76
+ const outputRate = typeof outputRecord.rate === "number" ? outputRecord.rate : undefined;
77
+
78
+ if (inputType !== expectedInput.type || outputType !== expectedOutput.type) return false;
79
+ if ("rate" in expectedInput && inputRate && inputRate !== expectedInput.rate) return false;
80
+ if ("rate" in expectedOutput && outputRate && outputRate !== expectedOutput.rate) return false;
81
+ return true;
82
+ }
83
+
84
+ export function createCarrierMediaMessage(profile: PhoneMediaProfile, streamSid: string | null, payload: string) {
85
+ if (profile.provider === "telnyx") {
86
+ return { event: "media", media: { payload } };
87
+ }
88
+ if (profile.provider === "sip") {
89
+ return { event: "media", streamSid: streamSid ?? "sip", media: { payload } };
90
+ }
91
+ if (!streamSid) return null;
92
+ return {
93
+ event: "media",
94
+ streamSid,
95
+ media: { payload },
96
+ };
97
+ }
98
+
99
+ export function createCarrierMarkMessage(profile: PhoneMediaProfile, streamSid: string | null, markName: string) {
100
+ if (profile.provider === "telnyx") {
101
+ return { event: "mark", mark: { name: markName } };
102
+ }
103
+ if (profile.provider === "sip") {
104
+ return { event: "mark", streamSid: streamSid ?? "sip", mark: { name: markName } };
105
+ }
106
+ if (!streamSid) return null;
107
+ return {
108
+ event: "mark",
109
+ streamSid,
110
+ mark: { name: markName },
111
+ };
112
+ }
113
+
114
+ export function createCarrierClearMessage(profile: PhoneMediaProfile, streamSid: string | null) {
115
+ if (profile.provider === "telnyx") return { event: "clear" };
116
+ if (profile.provider === "sip") return { event: "clear", streamSid: streamSid ?? "sip" };
117
+ if (!streamSid) return null;
118
+ return { event: "clear", streamSid };
119
+ }
120
+
121
+ export interface CreateOpenAIPhoneRealtimeBridgeInput {
122
+ profile: PhoneMediaProfile;
123
+ }
124
+
125
+ export function createOpenAIPhoneRealtimeBridge(_input: CreateOpenAIPhoneRealtimeBridgeInput): never {
126
+ throw new Error("createOpenAIPhoneRealtimeBridge is hosted by the backend-cloud phone runtime in this release");
127
+ }