@kognitivedev/backend-cloud 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.turbo/turbo-build.log +2 -0
  2. package/.turbo/turbo-test.log +14 -0
  3. package/CHANGELOG.md +11 -0
  4. package/README.md +88 -0
  5. package/dist/cloud-voice-parameters.d.ts +11 -0
  6. package/dist/cloud-voice-parameters.js +219 -0
  7. package/dist/cloud-voice-prompt-service.d.ts +24 -0
  8. package/dist/cloud-voice-prompt-service.js +382 -0
  9. package/dist/cloud-voice-runtime-service.d.ts +73 -0
  10. package/dist/cloud-voice-runtime-service.js +443 -0
  11. package/dist/cloud-voice.d.ts +36 -0
  12. package/dist/cloud-voice.js +683 -0
  13. package/dist/index.d.ts +10 -0
  14. package/dist/index.js +26 -0
  15. package/dist/phone-control.d.ts +50 -0
  16. package/dist/phone-control.js +97 -0
  17. package/dist/phone-runtime/audio-playout-tracker.d.ts +51 -0
  18. package/dist/phone-runtime/audio-playout-tracker.js +93 -0
  19. package/dist/phone-runtime/openai-twilio-realtime.d.ts +95 -0
  20. package/dist/phone-runtime/openai-twilio-realtime.js +1074 -0
  21. package/dist/tools.d.ts +2 -0
  22. package/dist/tools.js +216 -0
  23. package/dist/types.d.ts +468 -0
  24. package/dist/types.js +2 -0
  25. package/dist/utils.d.ts +3 -0
  26. package/dist/utils.js +14 -0
  27. package/package.json +47 -0
  28. package/src/__tests__/audio-playout-tracker.test.ts +46 -0
  29. package/src/__tests__/cloud-voice.test.ts +1006 -0
  30. package/src/__tests__/openai-twilio-realtime.test.ts +1193 -0
  31. package/src/__tests__/phone-control.test.ts +105 -0
  32. package/src/cloud-voice-parameters.ts +236 -0
  33. package/src/cloud-voice-prompt-service.ts +493 -0
  34. package/src/cloud-voice-runtime-service.ts +465 -0
  35. package/src/cloud-voice.ts +831 -0
  36. package/src/index.ts +10 -0
  37. package/src/phone-control.ts +156 -0
  38. package/src/phone-runtime/audio-playout-tracker.ts +132 -0
  39. package/src/phone-runtime/openai-twilio-realtime.ts +1250 -0
  40. package/src/tools.ts +227 -0
  41. package/src/types.ts +529 -0
  42. package/src/utils.ts +11 -0
  43. package/tsconfig.json +13 -0
@@ -0,0 +1,465 @@
1
+ import type {
2
+ CloudVoiceAgentConfig,
3
+ CloudVoiceChannel,
4
+ CloudVoiceConversationProfile,
5
+ CloudVoiceHumanizationConfig,
6
+ CloudVoicePipelineConfig,
7
+ CloudVoiceProvider,
8
+ CloudVoiceRuntimeCapabilities,
9
+ CloudVoiceSpeechConfig,
10
+ } from "./types";
11
+ import { getRecord, getString } from "./utils";
12
+
13
+ export const DEFAULT_CLOUD_VOICE_HUMANIZATION: Required<CloudVoiceHumanizationConfig> = {
14
+ enabled: true,
15
+ openingMode: "auto",
16
+ openingStyle: "brief",
17
+ fillerStyle: "light",
18
+ backchannelFrequency: "low",
19
+ disfluency: "rare",
20
+ toolLatencyFillerMs: 700,
21
+ conversationProfile: {
22
+ personality: "warm",
23
+ tone: "professional",
24
+ pacing: "concise",
25
+ unclearAudio: "ask_repeat",
26
+ confirmation: "critical_fields",
27
+ escalation: "when_blocked",
28
+ numberReadback: true,
29
+ },
30
+ };
31
+
32
+ export const CLOUD_VOICE_CONFIG_VERSION = 2 as const;
33
+ export const CLOUD_VOICE_PROMPT_COMPILER_VERSION = "cloud-voice-provider-native-v2";
34
+
35
+ export const CLOUD_VOICE_PROVIDER_CAPABILITIES: Record<CloudVoiceProvider, CloudVoiceRuntimeCapabilities> = {
36
+ "openai-realtime": {
37
+ canUpdateInstructionsLive: true,
38
+ supportsSessionResume: false,
39
+ supportsToolCalling: true,
40
+ supportsOutputAudioTranscripts: true,
41
+ supportsServerVadConfig: true,
42
+ supportsSemanticVad: true,
43
+ },
44
+ "gemini-live": {
45
+ canUpdateInstructionsLive: false,
46
+ supportsSessionResume: true,
47
+ supportsToolCalling: true,
48
+ supportsOutputAudioTranscripts: true,
49
+ supportsServerVadConfig: true,
50
+ supportsNativeAudioOptions: true,
51
+ supportsAffectiveDialog: true,
52
+ supportsProactiveAudio: true,
53
+ },
54
+ "kognitive-voice": {
55
+ canUpdateInstructionsLive: true,
56
+ supportsSessionResume: true,
57
+ supportsToolCalling: true,
58
+ supportsOutputAudioTranscripts: true,
59
+ supportsServerVadConfig: false,
60
+ supportsPipelineEotConfig: true,
61
+ supportsCartesiaTtsControls: true,
62
+ },
63
+ "xai-realtime": {
64
+ canUpdateInstructionsLive: true,
65
+ supportsSessionResume: false,
66
+ supportsToolCalling: true,
67
+ supportsOutputAudioTranscripts: true,
68
+ supportsServerVadConfig: true,
69
+ },
70
+ };
71
+
72
+ const DEFAULT_KOGNITIVE_PIPELINE: CloudVoicePipelineConfig = {
73
+ transport: { type: "websocket", provider: "kognitive-websocket" },
74
+ stt: { provider: "deepgram", model: "nova-3", language: "en" },
75
+ llm: { provider: "openai", model: "gpt-4o-mini" },
76
+ tts: { provider: "cartesia", model: "sonic-3", voice: "a0e99841-438c-4a64-b679-ae501e7d6091" },
77
+ turn: {
78
+ interruptResponse: true,
79
+ createResponse: true,
80
+ prefixPaddingMs: 300,
81
+ silenceDurationMs: 650,
82
+ },
83
+ };
84
+
85
+ const SPEECH_LANGUAGE_INSTRUCTIONS: Record<string, string> = {
86
+ en: "English",
87
+ "en-US": "English",
88
+ "en-GB": "English",
89
+ tr: "Turkish",
90
+ "tr-TR": "Turkish",
91
+ de: "German",
92
+ "de-DE": "German",
93
+ fr: "French",
94
+ "fr-FR": "French",
95
+ es: "Spanish",
96
+ "es-ES": "Spanish",
97
+ it: "Italian",
98
+ "it-IT": "Italian",
99
+ pt: "Portuguese",
100
+ "pt-PT": "Portuguese",
101
+ "pt-BR": "Portuguese",
102
+ nl: "Dutch",
103
+ "nl-NL": "Dutch",
104
+ sv: "Swedish",
105
+ "sv-SE": "Swedish",
106
+ no: "Norwegian",
107
+ "nb-NO": "Norwegian",
108
+ da: "Danish",
109
+ "da-DK": "Danish",
110
+ fi: "Finnish",
111
+ "fi-FI": "Finnish",
112
+ pl: "Polish",
113
+ "pl-PL": "Polish",
114
+ cs: "Czech",
115
+ "cs-CZ": "Czech",
116
+ ro: "Romanian",
117
+ "ro-RO": "Romanian",
118
+ el: "Greek",
119
+ "el-GR": "Greek",
120
+ ru: "Russian",
121
+ "ru-RU": "Russian",
122
+ uk: "Ukrainian",
123
+ "uk-UA": "Ukrainian",
124
+ };
125
+
126
+ const SPEECH_LANGUAGE_CODES: Record<string, string> = {
127
+ en: "en-US",
128
+ tr: "tr-TR",
129
+ de: "de-DE",
130
+ fr: "fr-FR",
131
+ es: "es-ES",
132
+ it: "it-IT",
133
+ pt: "pt-PT",
134
+ nl: "nl-NL",
135
+ sv: "sv-SE",
136
+ no: "nb-NO",
137
+ da: "da-DK",
138
+ fi: "fi-FI",
139
+ pl: "pl-PL",
140
+ cs: "cs-CZ",
141
+ ro: "ro-RO",
142
+ el: "el-GR",
143
+ ru: "ru-RU",
144
+ uk: "uk-UA",
145
+ };
146
+
147
+ const SPEECH_ACCENT_LANGUAGE_CODES: Record<string, string> = {
148
+ "neutral english": "en-US",
149
+ "british english": "en-GB",
150
+ "irish english": "en-IE",
151
+ "scottish english": "en-GB",
152
+ "standard istanbul turkish": "tr-TR",
153
+ "aegean turkish": "tr-TR",
154
+ "anatolian turkish": "tr-TR",
155
+ "standard german": "de-DE",
156
+ "austrian german": "de-AT",
157
+ "swiss german influenced standard german": "de-CH",
158
+ "standard french from france": "fr-FR",
159
+ "belgian french": "fr-BE",
160
+ "swiss french": "fr-CH",
161
+ "castilian spanish": "es-ES",
162
+ "andalusian spanish": "es-ES",
163
+ "canarian spanish": "es-ES",
164
+ };
165
+
166
+ function isPhoneChannel(channel?: CloudVoiceChannel) {
167
+ return channel === "phone" || channel === "sip" || channel === "outbound";
168
+ }
169
+
170
+ export function normalizeCloudVoiceHumanizationConfig(value: unknown): Required<CloudVoiceHumanizationConfig> {
171
+ const record = getRecord(value);
172
+ const openingMode = record.openingMode === "wait" || record.openingMode === "auto"
173
+ ? record.openingMode
174
+ : DEFAULT_CLOUD_VOICE_HUMANIZATION.openingMode;
175
+ const openingStyle = record.openingStyle === "warm" || record.openingStyle === "professional" || record.openingStyle === "brief"
176
+ ? record.openingStyle
177
+ : DEFAULT_CLOUD_VOICE_HUMANIZATION.openingStyle;
178
+ const fillerStyle = record.fillerStyle === "off" || record.fillerStyle === "natural" || record.fillerStyle === "light"
179
+ ? record.fillerStyle
180
+ : DEFAULT_CLOUD_VOICE_HUMANIZATION.fillerStyle;
181
+ const backchannelFrequency = record.backchannelFrequency === "off" || record.backchannelFrequency === "medium" || record.backchannelFrequency === "low"
182
+ ? record.backchannelFrequency
183
+ : DEFAULT_CLOUD_VOICE_HUMANIZATION.backchannelFrequency;
184
+ const disfluency = record.disfluency === "off" || record.disfluency === "rare"
185
+ ? record.disfluency
186
+ : DEFAULT_CLOUD_VOICE_HUMANIZATION.disfluency;
187
+ const toolLatencyFillerMs = typeof record.toolLatencyFillerMs === "number" && Number.isFinite(record.toolLatencyFillerMs)
188
+ ? Math.max(0, Math.round(record.toolLatencyFillerMs))
189
+ : DEFAULT_CLOUD_VOICE_HUMANIZATION.toolLatencyFillerMs;
190
+ return {
191
+ enabled: typeof record.enabled === "boolean" ? record.enabled : DEFAULT_CLOUD_VOICE_HUMANIZATION.enabled,
192
+ openingMode,
193
+ openingStyle,
194
+ fillerStyle,
195
+ backchannelFrequency,
196
+ disfluency,
197
+ toolLatencyFillerMs,
198
+ conversationProfile: normalizeCloudVoiceConversationProfile(record.conversationProfile ?? record.profile, record),
199
+ };
200
+ }
201
+
202
+ export function normalizeCloudVoiceConversationProfile(value: unknown, legacy: Record<string, unknown> = {}): CloudVoiceConversationProfile {
203
+ const record = getRecord(value);
204
+ const openingStyle = getString(legacy.openingStyle, "");
205
+ const fillerStyle = getString(legacy.fillerStyle, "");
206
+ const backchannelFrequency = getString(legacy.backchannelFrequency, "");
207
+ const defaults = DEFAULT_CLOUD_VOICE_HUMANIZATION.conversationProfile;
208
+ const personality: CloudVoiceConversationProfile["personality"] = record.personality === "neutral" || record.personality === "warm" || record.personality === "expert" || record.personality === "concierge"
209
+ ? record.personality
210
+ : openingStyle === "professional"
211
+ ? "expert"
212
+ : "warm";
213
+ return {
214
+ personality,
215
+ tone: record.tone === "casual" || record.tone === "professional" || record.tone === "empathetic" || record.tone === "polished"
216
+ ? record.tone
217
+ : openingStyle === "warm"
218
+ ? "empathetic"
219
+ : defaults.tone,
220
+ pacing: record.pacing === "concise" || record.pacing === "measured" || record.pacing === "deliberate" || record.pacing === "energetic"
221
+ ? record.pacing
222
+ : fillerStyle === "natural" || backchannelFrequency === "medium"
223
+ ? "measured"
224
+ : defaults.pacing,
225
+ unclearAudio: record.unclearAudio === "ask_repeat" || record.unclearAudio === "confirm_best_guess"
226
+ ? record.unclearAudio
227
+ : defaults.unclearAudio,
228
+ confirmation: record.confirmation === "critical_fields" || record.confirmation === "all_actions" || record.confirmation === "minimal"
229
+ ? record.confirmation
230
+ : defaults.confirmation,
231
+ escalation: record.escalation === "when_blocked" || record.escalation === "on_request" || record.escalation === "never"
232
+ ? record.escalation
233
+ : defaults.escalation,
234
+ numberReadback: typeof record.numberReadback === "boolean" ? record.numberReadback : defaults.numberReadback,
235
+ };
236
+ }
237
+
238
+ export function getCloudVoiceProviderCapabilities(provider: CloudVoiceProvider): CloudVoiceRuntimeCapabilities {
239
+ return CLOUD_VOICE_PROVIDER_CAPABILITIES[provider] ?? CLOUD_VOICE_PROVIDER_CAPABILITIES["openai-realtime"];
240
+ }
241
+
242
+ export function normalizeSpeechConfig(config: Pick<CloudVoiceAgentConfig, "metadata">): CloudVoiceSpeechConfig | undefined {
243
+ const speech = getRecord(getRecord(config.metadata).speech);
244
+ const normalized: CloudVoiceSpeechConfig = {};
245
+ const knownKeys = ["language", "accent", "style", "pace", "emotion"] as const;
246
+ for (const key of knownKeys) {
247
+ const value = speech[key];
248
+ if (typeof value === "string" && value.trim()) normalized[key] = value.trim();
249
+ }
250
+ for (const [key, value] of Object.entries(speech)) {
251
+ if ((knownKeys as readonly string[]).includes(key) || value === undefined) continue;
252
+ normalized[key] = value;
253
+ }
254
+ return Object.keys(normalized).length > 0 ? normalized : undefined;
255
+ }
256
+
257
+ export function toLanguageInstruction(value: string | undefined) {
258
+ return value ? SPEECH_LANGUAGE_INSTRUCTIONS[value] ?? SPEECH_LANGUAGE_INSTRUCTIONS[value.toLowerCase()] ?? value : undefined;
259
+ }
260
+
261
+ export function resolveCloudVoiceSpeechLanguageCode(config: Pick<CloudVoiceAgentConfig, "metadata">) {
262
+ const speech = normalizeSpeechConfig(config);
263
+ const accentCode = speech?.accent ? SPEECH_ACCENT_LANGUAGE_CODES[speech.accent.toLowerCase()] : undefined;
264
+ if (accentCode) return accentCode;
265
+ const language = speech?.language;
266
+ if (!language) return undefined;
267
+ if (/^[a-z]{2,3}-[A-Z]{2}$/i.test(language)) return language;
268
+ return SPEECH_LANGUAGE_CODES[language] ?? SPEECH_LANGUAGE_CODES[language.toLowerCase()] ?? language;
269
+ }
270
+
271
+ function resolveCloudVoiceTranscriptionLanguage(config: Pick<CloudVoiceAgentConfig, "metadata">) {
272
+ const speech = normalizeSpeechConfig(config);
273
+ const language = speech?.language;
274
+ if (!language) return undefined;
275
+ return language.includes("-") ? language.split("-")[0] : language;
276
+ }
277
+
278
+ function normalizePipelineTransport(config: CloudVoiceAgentConfig): CloudVoicePipelineConfig["transport"] {
279
+ const transport = getRecord(config.pipeline?.transport);
280
+ const type = transport.type === "webrtc" || transport.type === "websocket"
281
+ ? transport.type
282
+ : config.transport;
283
+ const provider = transport.provider === "daily" || transport.provider === "kognitive-websocket"
284
+ ? transport.provider
285
+ : type === "webrtc"
286
+ ? "daily"
287
+ : "kognitive-websocket";
288
+ return { type, provider };
289
+ }
290
+
291
+ export function normalizeCloudVoicePipelineConfig(config: CloudVoiceAgentConfig): CloudVoicePipelineConfig {
292
+ const pipeline = getRecord(config.pipeline);
293
+ const stt = getRecord(pipeline.stt);
294
+ const llm = getRecord(pipeline.llm);
295
+ const tts = getRecord(pipeline.tts);
296
+ const turn = getRecord(pipeline.turn);
297
+ const backgroundAudio = getRecord(pipeline.backgroundAudio);
298
+ const sttLanguage = typeof stt.language === "string" && stt.language.trim()
299
+ ? stt.language.trim()
300
+ : resolveCloudVoiceTranscriptionLanguage(config);
301
+
302
+ return {
303
+ transport: normalizePipelineTransport(config),
304
+ stt: {
305
+ provider: getString(stt.provider, DEFAULT_KOGNITIVE_PIPELINE.stt.provider),
306
+ model: getString(stt.model, DEFAULT_KOGNITIVE_PIPELINE.stt.model),
307
+ ...(sttLanguage ? { language: sttLanguage } : {}),
308
+ },
309
+ llm: {
310
+ provider: getString(llm.provider, DEFAULT_KOGNITIVE_PIPELINE.llm.provider),
311
+ model: getString(llm.model, config.model || DEFAULT_KOGNITIVE_PIPELINE.llm.model),
312
+ },
313
+ tts: {
314
+ provider: getString(tts.provider, DEFAULT_KOGNITIVE_PIPELINE.tts.provider),
315
+ model: getString(tts.model, DEFAULT_KOGNITIVE_PIPELINE.tts.model),
316
+ voice: getString(tts.voice, config.voice && config.voice !== "alloy" ? config.voice : DEFAULT_KOGNITIVE_PIPELINE.tts.voice),
317
+ ...(Object.keys(getRecord(tts.options)).length > 0 ? { options: getRecord(tts.options) } : {}),
318
+ },
319
+ ...(Object.keys(turn).length > 0
320
+ ? { turn: { ...DEFAULT_KOGNITIVE_PIPELINE.turn, ...turn } }
321
+ : { turn: DEFAULT_KOGNITIVE_PIPELINE.turn }),
322
+ ...(Object.keys(backgroundAudio).length > 0 ? { backgroundAudio } : {}),
323
+ };
324
+ }
325
+
326
+ export function toPreparedTranscription(config: Pick<CloudVoiceAgentConfig, "metadata" | "transcription">) {
327
+ if (config.transcription === null) return null;
328
+ const transcription = getRecord(config.transcription);
329
+ const language = resolveCloudVoiceTranscriptionLanguage(config);
330
+ return language
331
+ ? { ...transcription, language }
332
+ : transcription;
333
+ }
334
+
335
+ export function toOpenAITurnDetection(value: unknown) {
336
+ if (value === null) return null;
337
+ const turnDetection = getRecord(value);
338
+ const type = typeof turnDetection.type === "string" ? turnDetection.type : "";
339
+ if (!type) return undefined;
340
+
341
+ const config: Record<string, unknown> = { type };
342
+ if (typeof turnDetection.create_response === "boolean") {
343
+ config.create_response = turnDetection.create_response;
344
+ } else if (typeof turnDetection.createResponse === "boolean") {
345
+ config.create_response = turnDetection.createResponse;
346
+ }
347
+ if (typeof turnDetection.interrupt_response === "boolean") {
348
+ config.interrupt_response = turnDetection.interrupt_response;
349
+ } else if (typeof turnDetection.interruptResponse === "boolean") {
350
+ config.interrupt_response = turnDetection.interruptResponse;
351
+ }
352
+ if (typeof turnDetection.prefix_padding_ms === "number") {
353
+ config.prefix_padding_ms = turnDetection.prefix_padding_ms;
354
+ } else if (typeof turnDetection.prefixPaddingMs === "number") {
355
+ config.prefix_padding_ms = turnDetection.prefixPaddingMs;
356
+ }
357
+ if (typeof turnDetection.silence_duration_ms === "number") {
358
+ config.silence_duration_ms = turnDetection.silence_duration_ms;
359
+ } else if (typeof turnDetection.silenceDurationMs === "number") {
360
+ config.silence_duration_ms = turnDetection.silenceDurationMs;
361
+ }
362
+ if (typeof turnDetection.threshold === "number") {
363
+ config.threshold = turnDetection.threshold;
364
+ }
365
+ if (typeof turnDetection.eagerness === "string") {
366
+ config.eagerness = turnDetection.eagerness;
367
+ }
368
+ return config;
369
+ }
370
+
371
+ export function normalizeCloudVoiceTurnDetection(
372
+ provider: CloudVoiceProvider,
373
+ channel: CloudVoiceChannel,
374
+ value: unknown,
375
+ ) {
376
+ if (value === null) return null;
377
+ const record = getRecord(value);
378
+ const isPhone = isPhoneChannel(channel);
379
+ if (provider === "openai-realtime" && !isPhone) {
380
+ const type = getString(record.type, "semantic_vad") === "off" ? "off" : getString(record.type, "semantic_vad");
381
+ if (type === "off") return null;
382
+ if (type === "semantic_vad") {
383
+ return {
384
+ type: "semantic_vad",
385
+ createResponse: typeof record.createResponse === "boolean" ? record.createResponse : true,
386
+ interruptResponse: typeof record.interruptResponse === "boolean" ? record.interruptResponse : true,
387
+ eagerness: getString(record.eagerness, "low") || "low",
388
+ };
389
+ }
390
+ }
391
+ if (provider === "gemini-live") {
392
+ return {
393
+ type: "server_vad",
394
+ createResponse: typeof record.createResponse === "boolean" ? record.createResponse : true,
395
+ interruptResponse: typeof record.interruptResponse === "boolean" ? record.interruptResponse : true,
396
+ prefixPaddingMs: typeof record.prefixPaddingMs === "number" ? record.prefixPaddingMs : 120,
397
+ silenceDurationMs: typeof record.silenceDurationMs === "number" ? record.silenceDurationMs : 700,
398
+ startOfSpeechSensitivity: getString(record.startOfSpeechSensitivity, "START_SENSITIVITY_LOW"),
399
+ endOfSpeechSensitivity: getString(record.endOfSpeechSensitivity, "END_SENSITIVITY_LOW"),
400
+ disabled: typeof record.disabled === "boolean" ? record.disabled : false,
401
+ };
402
+ }
403
+ if (provider === "kognitive-voice") {
404
+ return {
405
+ type: "flux_eot",
406
+ eager_eot_threshold: typeof record.eager_eot_threshold === "number" ? record.eager_eot_threshold : 0.85,
407
+ eot_threshold: typeof record.eot_threshold === "number" ? record.eot_threshold : 0.65,
408
+ eot_timeout_ms: typeof record.eot_timeout_ms === "number" ? record.eot_timeout_ms : 900,
409
+ };
410
+ }
411
+ return {
412
+ type: getString(record.type, "server_vad") || "server_vad",
413
+ createResponse: typeof record.createResponse === "boolean" ? record.createResponse : true,
414
+ interruptResponse: typeof record.interruptResponse === "boolean" ? record.interruptResponse : true,
415
+ threshold: typeof record.threshold === "number" ? record.threshold : 0.6,
416
+ prefixPaddingMs: typeof record.prefixPaddingMs === "number" ? record.prefixPaddingMs : 300,
417
+ silenceDurationMs: typeof record.silenceDurationMs === "number" ? record.silenceDurationMs : 700,
418
+ };
419
+ }
420
+
421
+ export function normalizeCloudVoiceInputNoiseReduction(
422
+ provider: CloudVoiceProvider,
423
+ channel: CloudVoiceChannel,
424
+ value: unknown,
425
+ ) {
426
+ if (value === null) return null;
427
+ const record = getRecord(value);
428
+ if (Object.keys(record).length > 0) return record;
429
+
430
+ if (provider === "openai-realtime" && isPhoneChannel(channel)) {
431
+ return { type: "near_field" };
432
+ }
433
+
434
+ return null;
435
+ }
436
+
437
+ export function compileCartesiaTtsOptions(config: Pick<CloudVoiceAgentConfig, "voice" | "metadata" | "providerOptions" | "pipeline">) {
438
+ const providerOptions = getRecord(config.providerOptions);
439
+ const pipeline = getRecord(config.pipeline ?? providerOptions.pipeline);
440
+ const tts = getRecord(pipeline.tts);
441
+ const cartesia = getRecord(tts.options ?? tts.providerOptions ?? providerOptions.cartesia);
442
+ const speech = normalizeSpeechConfig(config);
443
+ const options: Record<string, unknown> = {
444
+ modelId: getString(tts.model, getString(cartesia.modelId, "sonic-3")),
445
+ voice: getString(tts.voice, config.voice && config.voice !== "alloy" ? config.voice : DEFAULT_KOGNITIVE_PIPELINE.tts.voice),
446
+ };
447
+ const pace = getString(speech?.pace, "").toLowerCase();
448
+ const speed = typeof cartesia.speed === "number" || cartesia.speed === "slow" || cartesia.speed === "normal" || cartesia.speed === "fast"
449
+ ? cartesia.speed
450
+ : pace.includes("slow")
451
+ ? "slow"
452
+ : pace.includes("fast") || pace.includes("energetic")
453
+ ? "fast"
454
+ : "normal";
455
+ options.speed = speed;
456
+ const emotion = getString(speech?.emotion, "");
457
+ if (emotion) options.emotion = [emotion];
458
+ if (typeof cartesia.pronunciationDictId === "string" && cartesia.pronunciationDictId.trim()) {
459
+ options.pronunciationDictId = cartesia.pronunciationDictId.trim();
460
+ }
461
+ if (cartesia.contextMode === "reset" || cartesia.contextMode === "continue") {
462
+ options.contextMode = cartesia.contextMode;
463
+ }
464
+ return options;
465
+ }