@kognitivedev/backend-cloud 0.2.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2 -0
- package/.turbo/turbo-test.log +14 -0
- package/CHANGELOG.md +11 -0
- package/README.md +88 -0
- package/dist/cloud-voice-parameters.d.ts +11 -0
- package/dist/cloud-voice-parameters.js +219 -0
- package/dist/cloud-voice-prompt-service.d.ts +24 -0
- package/dist/cloud-voice-prompt-service.js +382 -0
- package/dist/cloud-voice-runtime-service.d.ts +73 -0
- package/dist/cloud-voice-runtime-service.js +443 -0
- package/dist/cloud-voice.d.ts +36 -0
- package/dist/cloud-voice.js +683 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +26 -0
- package/dist/phone-control.d.ts +50 -0
- package/dist/phone-control.js +97 -0
- package/dist/phone-runtime/audio-playout-tracker.d.ts +51 -0
- package/dist/phone-runtime/audio-playout-tracker.js +93 -0
- package/dist/phone-runtime/openai-twilio-realtime.d.ts +95 -0
- package/dist/phone-runtime/openai-twilio-realtime.js +1074 -0
- package/dist/tools.d.ts +2 -0
- package/dist/tools.js +216 -0
- package/dist/types.d.ts +468 -0
- package/dist/types.js +2 -0
- package/dist/utils.d.ts +3 -0
- package/dist/utils.js +14 -0
- package/package.json +47 -0
- package/src/__tests__/audio-playout-tracker.test.ts +46 -0
- package/src/__tests__/cloud-voice.test.ts +1006 -0
- package/src/__tests__/openai-twilio-realtime.test.ts +1193 -0
- package/src/__tests__/phone-control.test.ts +105 -0
- package/src/cloud-voice-parameters.ts +236 -0
- package/src/cloud-voice-prompt-service.ts +493 -0
- package/src/cloud-voice-runtime-service.ts +465 -0
- package/src/cloud-voice.ts +831 -0
- package/src/index.ts +10 -0
- package/src/phone-control.ts +156 -0
- package/src/phone-runtime/audio-playout-tracker.ts +132 -0
- package/src/phone-runtime/openai-twilio-realtime.ts +1250 -0
- package/src/tools.ts +227 -0
- package/src/types.ts +529 -0
- package/src/utils.ts +11 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,443 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
3
|
+
exports.CLOUD_VOICE_PROVIDER_CAPABILITIES = exports.CLOUD_VOICE_PROMPT_COMPILER_VERSION = exports.CLOUD_VOICE_CONFIG_VERSION = exports.DEFAULT_CLOUD_VOICE_HUMANIZATION = void 0;
|
|
4
|
+
exports.normalizeCloudVoiceHumanizationConfig = normalizeCloudVoiceHumanizationConfig;
|
|
5
|
+
exports.normalizeCloudVoiceConversationProfile = normalizeCloudVoiceConversationProfile;
|
|
6
|
+
exports.getCloudVoiceProviderCapabilities = getCloudVoiceProviderCapabilities;
|
|
7
|
+
exports.normalizeSpeechConfig = normalizeSpeechConfig;
|
|
8
|
+
exports.toLanguageInstruction = toLanguageInstruction;
|
|
9
|
+
exports.resolveCloudVoiceSpeechLanguageCode = resolveCloudVoiceSpeechLanguageCode;
|
|
10
|
+
exports.normalizeCloudVoicePipelineConfig = normalizeCloudVoicePipelineConfig;
|
|
11
|
+
exports.toPreparedTranscription = toPreparedTranscription;
|
|
12
|
+
exports.toOpenAITurnDetection = toOpenAITurnDetection;
|
|
13
|
+
exports.normalizeCloudVoiceTurnDetection = normalizeCloudVoiceTurnDetection;
|
|
14
|
+
exports.normalizeCloudVoiceInputNoiseReduction = normalizeCloudVoiceInputNoiseReduction;
|
|
15
|
+
exports.compileCartesiaTtsOptions = compileCartesiaTtsOptions;
|
|
16
|
+
const utils_1 = require("./utils");
|
|
17
|
+
exports.DEFAULT_CLOUD_VOICE_HUMANIZATION = {
|
|
18
|
+
enabled: true,
|
|
19
|
+
openingMode: "auto",
|
|
20
|
+
openingStyle: "brief",
|
|
21
|
+
fillerStyle: "light",
|
|
22
|
+
backchannelFrequency: "low",
|
|
23
|
+
disfluency: "rare",
|
|
24
|
+
toolLatencyFillerMs: 700,
|
|
25
|
+
conversationProfile: {
|
|
26
|
+
personality: "warm",
|
|
27
|
+
tone: "professional",
|
|
28
|
+
pacing: "concise",
|
|
29
|
+
unclearAudio: "ask_repeat",
|
|
30
|
+
confirmation: "critical_fields",
|
|
31
|
+
escalation: "when_blocked",
|
|
32
|
+
numberReadback: true,
|
|
33
|
+
},
|
|
34
|
+
};
|
|
35
|
+
exports.CLOUD_VOICE_CONFIG_VERSION = 2;
|
|
36
|
+
exports.CLOUD_VOICE_PROMPT_COMPILER_VERSION = "cloud-voice-provider-native-v2";
|
|
37
|
+
exports.CLOUD_VOICE_PROVIDER_CAPABILITIES = {
|
|
38
|
+
"openai-realtime": {
|
|
39
|
+
canUpdateInstructionsLive: true,
|
|
40
|
+
supportsSessionResume: false,
|
|
41
|
+
supportsToolCalling: true,
|
|
42
|
+
supportsOutputAudioTranscripts: true,
|
|
43
|
+
supportsServerVadConfig: true,
|
|
44
|
+
supportsSemanticVad: true,
|
|
45
|
+
},
|
|
46
|
+
"gemini-live": {
|
|
47
|
+
canUpdateInstructionsLive: false,
|
|
48
|
+
supportsSessionResume: true,
|
|
49
|
+
supportsToolCalling: true,
|
|
50
|
+
supportsOutputAudioTranscripts: true,
|
|
51
|
+
supportsServerVadConfig: true,
|
|
52
|
+
supportsNativeAudioOptions: true,
|
|
53
|
+
supportsAffectiveDialog: true,
|
|
54
|
+
supportsProactiveAudio: true,
|
|
55
|
+
},
|
|
56
|
+
"kognitive-voice": {
|
|
57
|
+
canUpdateInstructionsLive: true,
|
|
58
|
+
supportsSessionResume: true,
|
|
59
|
+
supportsToolCalling: true,
|
|
60
|
+
supportsOutputAudioTranscripts: true,
|
|
61
|
+
supportsServerVadConfig: false,
|
|
62
|
+
supportsPipelineEotConfig: true,
|
|
63
|
+
supportsCartesiaTtsControls: true,
|
|
64
|
+
},
|
|
65
|
+
"xai-realtime": {
|
|
66
|
+
canUpdateInstructionsLive: true,
|
|
67
|
+
supportsSessionResume: false,
|
|
68
|
+
supportsToolCalling: true,
|
|
69
|
+
supportsOutputAudioTranscripts: true,
|
|
70
|
+
supportsServerVadConfig: true,
|
|
71
|
+
},
|
|
72
|
+
};
|
|
73
|
+
const DEFAULT_KOGNITIVE_PIPELINE = {
|
|
74
|
+
transport: { type: "websocket", provider: "kognitive-websocket" },
|
|
75
|
+
stt: { provider: "deepgram", model: "nova-3", language: "en" },
|
|
76
|
+
llm: { provider: "openai", model: "gpt-4o-mini" },
|
|
77
|
+
tts: { provider: "cartesia", model: "sonic-3", voice: "a0e99841-438c-4a64-b679-ae501e7d6091" },
|
|
78
|
+
turn: {
|
|
79
|
+
interruptResponse: true,
|
|
80
|
+
createResponse: true,
|
|
81
|
+
prefixPaddingMs: 300,
|
|
82
|
+
silenceDurationMs: 650,
|
|
83
|
+
},
|
|
84
|
+
};
|
|
85
|
+
const SPEECH_LANGUAGE_INSTRUCTIONS = {
|
|
86
|
+
en: "English",
|
|
87
|
+
"en-US": "English",
|
|
88
|
+
"en-GB": "English",
|
|
89
|
+
tr: "Turkish",
|
|
90
|
+
"tr-TR": "Turkish",
|
|
91
|
+
de: "German",
|
|
92
|
+
"de-DE": "German",
|
|
93
|
+
fr: "French",
|
|
94
|
+
"fr-FR": "French",
|
|
95
|
+
es: "Spanish",
|
|
96
|
+
"es-ES": "Spanish",
|
|
97
|
+
it: "Italian",
|
|
98
|
+
"it-IT": "Italian",
|
|
99
|
+
pt: "Portuguese",
|
|
100
|
+
"pt-PT": "Portuguese",
|
|
101
|
+
"pt-BR": "Portuguese",
|
|
102
|
+
nl: "Dutch",
|
|
103
|
+
"nl-NL": "Dutch",
|
|
104
|
+
sv: "Swedish",
|
|
105
|
+
"sv-SE": "Swedish",
|
|
106
|
+
no: "Norwegian",
|
|
107
|
+
"nb-NO": "Norwegian",
|
|
108
|
+
da: "Danish",
|
|
109
|
+
"da-DK": "Danish",
|
|
110
|
+
fi: "Finnish",
|
|
111
|
+
"fi-FI": "Finnish",
|
|
112
|
+
pl: "Polish",
|
|
113
|
+
"pl-PL": "Polish",
|
|
114
|
+
cs: "Czech",
|
|
115
|
+
"cs-CZ": "Czech",
|
|
116
|
+
ro: "Romanian",
|
|
117
|
+
"ro-RO": "Romanian",
|
|
118
|
+
el: "Greek",
|
|
119
|
+
"el-GR": "Greek",
|
|
120
|
+
ru: "Russian",
|
|
121
|
+
"ru-RU": "Russian",
|
|
122
|
+
uk: "Ukrainian",
|
|
123
|
+
"uk-UA": "Ukrainian",
|
|
124
|
+
};
|
|
125
|
+
const SPEECH_LANGUAGE_CODES = {
|
|
126
|
+
en: "en-US",
|
|
127
|
+
tr: "tr-TR",
|
|
128
|
+
de: "de-DE",
|
|
129
|
+
fr: "fr-FR",
|
|
130
|
+
es: "es-ES",
|
|
131
|
+
it: "it-IT",
|
|
132
|
+
pt: "pt-PT",
|
|
133
|
+
nl: "nl-NL",
|
|
134
|
+
sv: "sv-SE",
|
|
135
|
+
no: "nb-NO",
|
|
136
|
+
da: "da-DK",
|
|
137
|
+
fi: "fi-FI",
|
|
138
|
+
pl: "pl-PL",
|
|
139
|
+
cs: "cs-CZ",
|
|
140
|
+
ro: "ro-RO",
|
|
141
|
+
el: "el-GR",
|
|
142
|
+
ru: "ru-RU",
|
|
143
|
+
uk: "uk-UA",
|
|
144
|
+
};
|
|
145
|
+
const SPEECH_ACCENT_LANGUAGE_CODES = {
|
|
146
|
+
"neutral english": "en-US",
|
|
147
|
+
"british english": "en-GB",
|
|
148
|
+
"irish english": "en-IE",
|
|
149
|
+
"scottish english": "en-GB",
|
|
150
|
+
"standard istanbul turkish": "tr-TR",
|
|
151
|
+
"aegean turkish": "tr-TR",
|
|
152
|
+
"anatolian turkish": "tr-TR",
|
|
153
|
+
"standard german": "de-DE",
|
|
154
|
+
"austrian german": "de-AT",
|
|
155
|
+
"swiss german influenced standard german": "de-CH",
|
|
156
|
+
"standard french from france": "fr-FR",
|
|
157
|
+
"belgian french": "fr-BE",
|
|
158
|
+
"swiss french": "fr-CH",
|
|
159
|
+
"castilian spanish": "es-ES",
|
|
160
|
+
"andalusian spanish": "es-ES",
|
|
161
|
+
"canarian spanish": "es-ES",
|
|
162
|
+
};
|
|
163
|
+
function isPhoneChannel(channel) {
|
|
164
|
+
return channel === "phone" || channel === "sip" || channel === "outbound";
|
|
165
|
+
}
|
|
166
|
+
function normalizeCloudVoiceHumanizationConfig(value) {
|
|
167
|
+
var _a;
|
|
168
|
+
const record = (0, utils_1.getRecord)(value);
|
|
169
|
+
const openingMode = record.openingMode === "wait" || record.openingMode === "auto"
|
|
170
|
+
? record.openingMode
|
|
171
|
+
: exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.openingMode;
|
|
172
|
+
const openingStyle = record.openingStyle === "warm" || record.openingStyle === "professional" || record.openingStyle === "brief"
|
|
173
|
+
? record.openingStyle
|
|
174
|
+
: exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.openingStyle;
|
|
175
|
+
const fillerStyle = record.fillerStyle === "off" || record.fillerStyle === "natural" || record.fillerStyle === "light"
|
|
176
|
+
? record.fillerStyle
|
|
177
|
+
: exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.fillerStyle;
|
|
178
|
+
const backchannelFrequency = record.backchannelFrequency === "off" || record.backchannelFrequency === "medium" || record.backchannelFrequency === "low"
|
|
179
|
+
? record.backchannelFrequency
|
|
180
|
+
: exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.backchannelFrequency;
|
|
181
|
+
const disfluency = record.disfluency === "off" || record.disfluency === "rare"
|
|
182
|
+
? record.disfluency
|
|
183
|
+
: exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.disfluency;
|
|
184
|
+
const toolLatencyFillerMs = typeof record.toolLatencyFillerMs === "number" && Number.isFinite(record.toolLatencyFillerMs)
|
|
185
|
+
? Math.max(0, Math.round(record.toolLatencyFillerMs))
|
|
186
|
+
: exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.toolLatencyFillerMs;
|
|
187
|
+
return {
|
|
188
|
+
enabled: typeof record.enabled === "boolean" ? record.enabled : exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.enabled,
|
|
189
|
+
openingMode,
|
|
190
|
+
openingStyle,
|
|
191
|
+
fillerStyle,
|
|
192
|
+
backchannelFrequency,
|
|
193
|
+
disfluency,
|
|
194
|
+
toolLatencyFillerMs,
|
|
195
|
+
conversationProfile: normalizeCloudVoiceConversationProfile((_a = record.conversationProfile) !== null && _a !== void 0 ? _a : record.profile, record),
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
function normalizeCloudVoiceConversationProfile(value, legacy = {}) {
|
|
199
|
+
const record = (0, utils_1.getRecord)(value);
|
|
200
|
+
const openingStyle = (0, utils_1.getString)(legacy.openingStyle, "");
|
|
201
|
+
const fillerStyle = (0, utils_1.getString)(legacy.fillerStyle, "");
|
|
202
|
+
const backchannelFrequency = (0, utils_1.getString)(legacy.backchannelFrequency, "");
|
|
203
|
+
const defaults = exports.DEFAULT_CLOUD_VOICE_HUMANIZATION.conversationProfile;
|
|
204
|
+
const personality = record.personality === "neutral" || record.personality === "warm" || record.personality === "expert" || record.personality === "concierge"
|
|
205
|
+
? record.personality
|
|
206
|
+
: openingStyle === "professional"
|
|
207
|
+
? "expert"
|
|
208
|
+
: "warm";
|
|
209
|
+
return {
|
|
210
|
+
personality,
|
|
211
|
+
tone: record.tone === "casual" || record.tone === "professional" || record.tone === "empathetic" || record.tone === "polished"
|
|
212
|
+
? record.tone
|
|
213
|
+
: openingStyle === "warm"
|
|
214
|
+
? "empathetic"
|
|
215
|
+
: defaults.tone,
|
|
216
|
+
pacing: record.pacing === "concise" || record.pacing === "measured" || record.pacing === "deliberate" || record.pacing === "energetic"
|
|
217
|
+
? record.pacing
|
|
218
|
+
: fillerStyle === "natural" || backchannelFrequency === "medium"
|
|
219
|
+
? "measured"
|
|
220
|
+
: defaults.pacing,
|
|
221
|
+
unclearAudio: record.unclearAudio === "ask_repeat" || record.unclearAudio === "confirm_best_guess"
|
|
222
|
+
? record.unclearAudio
|
|
223
|
+
: defaults.unclearAudio,
|
|
224
|
+
confirmation: record.confirmation === "critical_fields" || record.confirmation === "all_actions" || record.confirmation === "minimal"
|
|
225
|
+
? record.confirmation
|
|
226
|
+
: defaults.confirmation,
|
|
227
|
+
escalation: record.escalation === "when_blocked" || record.escalation === "on_request" || record.escalation === "never"
|
|
228
|
+
? record.escalation
|
|
229
|
+
: defaults.escalation,
|
|
230
|
+
numberReadback: typeof record.numberReadback === "boolean" ? record.numberReadback : defaults.numberReadback,
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
function getCloudVoiceProviderCapabilities(provider) {
|
|
234
|
+
var _a;
|
|
235
|
+
return (_a = exports.CLOUD_VOICE_PROVIDER_CAPABILITIES[provider]) !== null && _a !== void 0 ? _a : exports.CLOUD_VOICE_PROVIDER_CAPABILITIES["openai-realtime"];
|
|
236
|
+
}
|
|
237
|
+
function normalizeSpeechConfig(config) {
|
|
238
|
+
const speech = (0, utils_1.getRecord)((0, utils_1.getRecord)(config.metadata).speech);
|
|
239
|
+
const normalized = {};
|
|
240
|
+
const knownKeys = ["language", "accent", "style", "pace", "emotion"];
|
|
241
|
+
for (const key of knownKeys) {
|
|
242
|
+
const value = speech[key];
|
|
243
|
+
if (typeof value === "string" && value.trim())
|
|
244
|
+
normalized[key] = value.trim();
|
|
245
|
+
}
|
|
246
|
+
for (const [key, value] of Object.entries(speech)) {
|
|
247
|
+
if (knownKeys.includes(key) || value === undefined)
|
|
248
|
+
continue;
|
|
249
|
+
normalized[key] = value;
|
|
250
|
+
}
|
|
251
|
+
return Object.keys(normalized).length > 0 ? normalized : undefined;
|
|
252
|
+
}
|
|
253
|
+
function toLanguageInstruction(value) {
|
|
254
|
+
var _a, _b;
|
|
255
|
+
return value ? (_b = (_a = SPEECH_LANGUAGE_INSTRUCTIONS[value]) !== null && _a !== void 0 ? _a : SPEECH_LANGUAGE_INSTRUCTIONS[value.toLowerCase()]) !== null && _b !== void 0 ? _b : value : undefined;
|
|
256
|
+
}
|
|
257
|
+
function resolveCloudVoiceSpeechLanguageCode(config) {
|
|
258
|
+
var _a, _b;
|
|
259
|
+
const speech = normalizeSpeechConfig(config);
|
|
260
|
+
const accentCode = (speech === null || speech === void 0 ? void 0 : speech.accent) ? SPEECH_ACCENT_LANGUAGE_CODES[speech.accent.toLowerCase()] : undefined;
|
|
261
|
+
if (accentCode)
|
|
262
|
+
return accentCode;
|
|
263
|
+
const language = speech === null || speech === void 0 ? void 0 : speech.language;
|
|
264
|
+
if (!language)
|
|
265
|
+
return undefined;
|
|
266
|
+
if (/^[a-z]{2,3}-[A-Z]{2}$/i.test(language))
|
|
267
|
+
return language;
|
|
268
|
+
return (_b = (_a = SPEECH_LANGUAGE_CODES[language]) !== null && _a !== void 0 ? _a : SPEECH_LANGUAGE_CODES[language.toLowerCase()]) !== null && _b !== void 0 ? _b : language;
|
|
269
|
+
}
|
|
270
|
+
function resolveCloudVoiceTranscriptionLanguage(config) {
|
|
271
|
+
const speech = normalizeSpeechConfig(config);
|
|
272
|
+
const language = speech === null || speech === void 0 ? void 0 : speech.language;
|
|
273
|
+
if (!language)
|
|
274
|
+
return undefined;
|
|
275
|
+
return language.includes("-") ? language.split("-")[0] : language;
|
|
276
|
+
}
|
|
277
|
+
function normalizePipelineTransport(config) {
|
|
278
|
+
var _a;
|
|
279
|
+
const transport = (0, utils_1.getRecord)((_a = config.pipeline) === null || _a === void 0 ? void 0 : _a.transport);
|
|
280
|
+
const type = transport.type === "webrtc" || transport.type === "websocket"
|
|
281
|
+
? transport.type
|
|
282
|
+
: config.transport;
|
|
283
|
+
const provider = transport.provider === "daily" || transport.provider === "kognitive-websocket"
|
|
284
|
+
? transport.provider
|
|
285
|
+
: type === "webrtc"
|
|
286
|
+
? "daily"
|
|
287
|
+
: "kognitive-websocket";
|
|
288
|
+
return { type, provider };
|
|
289
|
+
}
|
|
290
|
+
function normalizeCloudVoicePipelineConfig(config) {
|
|
291
|
+
const pipeline = (0, utils_1.getRecord)(config.pipeline);
|
|
292
|
+
const stt = (0, utils_1.getRecord)(pipeline.stt);
|
|
293
|
+
const llm = (0, utils_1.getRecord)(pipeline.llm);
|
|
294
|
+
const tts = (0, utils_1.getRecord)(pipeline.tts);
|
|
295
|
+
const turn = (0, utils_1.getRecord)(pipeline.turn);
|
|
296
|
+
const backgroundAudio = (0, utils_1.getRecord)(pipeline.backgroundAudio);
|
|
297
|
+
const sttLanguage = typeof stt.language === "string" && stt.language.trim()
|
|
298
|
+
? stt.language.trim()
|
|
299
|
+
: resolveCloudVoiceTranscriptionLanguage(config);
|
|
300
|
+
return Object.assign(Object.assign({ transport: normalizePipelineTransport(config), stt: Object.assign({ provider: (0, utils_1.getString)(stt.provider, DEFAULT_KOGNITIVE_PIPELINE.stt.provider), model: (0, utils_1.getString)(stt.model, DEFAULT_KOGNITIVE_PIPELINE.stt.model) }, (sttLanguage ? { language: sttLanguage } : {})), llm: {
|
|
301
|
+
provider: (0, utils_1.getString)(llm.provider, DEFAULT_KOGNITIVE_PIPELINE.llm.provider),
|
|
302
|
+
model: (0, utils_1.getString)(llm.model, config.model || DEFAULT_KOGNITIVE_PIPELINE.llm.model),
|
|
303
|
+
}, tts: Object.assign({ provider: (0, utils_1.getString)(tts.provider, DEFAULT_KOGNITIVE_PIPELINE.tts.provider), model: (0, utils_1.getString)(tts.model, DEFAULT_KOGNITIVE_PIPELINE.tts.model), voice: (0, utils_1.getString)(tts.voice, config.voice && config.voice !== "alloy" ? config.voice : DEFAULT_KOGNITIVE_PIPELINE.tts.voice) }, (Object.keys((0, utils_1.getRecord)(tts.options)).length > 0 ? { options: (0, utils_1.getRecord)(tts.options) } : {})) }, (Object.keys(turn).length > 0
|
|
304
|
+
? { turn: Object.assign(Object.assign({}, DEFAULT_KOGNITIVE_PIPELINE.turn), turn) }
|
|
305
|
+
: { turn: DEFAULT_KOGNITIVE_PIPELINE.turn })), (Object.keys(backgroundAudio).length > 0 ? { backgroundAudio } : {}));
|
|
306
|
+
}
|
|
307
|
+
function toPreparedTranscription(config) {
|
|
308
|
+
if (config.transcription === null)
|
|
309
|
+
return null;
|
|
310
|
+
const transcription = (0, utils_1.getRecord)(config.transcription);
|
|
311
|
+
const language = resolveCloudVoiceTranscriptionLanguage(config);
|
|
312
|
+
return language
|
|
313
|
+
? Object.assign(Object.assign({}, transcription), { language }) : transcription;
|
|
314
|
+
}
|
|
315
|
+
function toOpenAITurnDetection(value) {
|
|
316
|
+
if (value === null)
|
|
317
|
+
return null;
|
|
318
|
+
const turnDetection = (0, utils_1.getRecord)(value);
|
|
319
|
+
const type = typeof turnDetection.type === "string" ? turnDetection.type : "";
|
|
320
|
+
if (!type)
|
|
321
|
+
return undefined;
|
|
322
|
+
const config = { type };
|
|
323
|
+
if (typeof turnDetection.create_response === "boolean") {
|
|
324
|
+
config.create_response = turnDetection.create_response;
|
|
325
|
+
}
|
|
326
|
+
else if (typeof turnDetection.createResponse === "boolean") {
|
|
327
|
+
config.create_response = turnDetection.createResponse;
|
|
328
|
+
}
|
|
329
|
+
if (typeof turnDetection.interrupt_response === "boolean") {
|
|
330
|
+
config.interrupt_response = turnDetection.interrupt_response;
|
|
331
|
+
}
|
|
332
|
+
else if (typeof turnDetection.interruptResponse === "boolean") {
|
|
333
|
+
config.interrupt_response = turnDetection.interruptResponse;
|
|
334
|
+
}
|
|
335
|
+
if (typeof turnDetection.prefix_padding_ms === "number") {
|
|
336
|
+
config.prefix_padding_ms = turnDetection.prefix_padding_ms;
|
|
337
|
+
}
|
|
338
|
+
else if (typeof turnDetection.prefixPaddingMs === "number") {
|
|
339
|
+
config.prefix_padding_ms = turnDetection.prefixPaddingMs;
|
|
340
|
+
}
|
|
341
|
+
if (typeof turnDetection.silence_duration_ms === "number") {
|
|
342
|
+
config.silence_duration_ms = turnDetection.silence_duration_ms;
|
|
343
|
+
}
|
|
344
|
+
else if (typeof turnDetection.silenceDurationMs === "number") {
|
|
345
|
+
config.silence_duration_ms = turnDetection.silenceDurationMs;
|
|
346
|
+
}
|
|
347
|
+
if (typeof turnDetection.threshold === "number") {
|
|
348
|
+
config.threshold = turnDetection.threshold;
|
|
349
|
+
}
|
|
350
|
+
if (typeof turnDetection.eagerness === "string") {
|
|
351
|
+
config.eagerness = turnDetection.eagerness;
|
|
352
|
+
}
|
|
353
|
+
return config;
|
|
354
|
+
}
|
|
355
|
+
function normalizeCloudVoiceTurnDetection(provider, channel, value) {
|
|
356
|
+
if (value === null)
|
|
357
|
+
return null;
|
|
358
|
+
const record = (0, utils_1.getRecord)(value);
|
|
359
|
+
const isPhone = isPhoneChannel(channel);
|
|
360
|
+
if (provider === "openai-realtime" && !isPhone) {
|
|
361
|
+
const type = (0, utils_1.getString)(record.type, "semantic_vad") === "off" ? "off" : (0, utils_1.getString)(record.type, "semantic_vad");
|
|
362
|
+
if (type === "off")
|
|
363
|
+
return null;
|
|
364
|
+
if (type === "semantic_vad") {
|
|
365
|
+
return {
|
|
366
|
+
type: "semantic_vad",
|
|
367
|
+
createResponse: typeof record.createResponse === "boolean" ? record.createResponse : true,
|
|
368
|
+
interruptResponse: typeof record.interruptResponse === "boolean" ? record.interruptResponse : true,
|
|
369
|
+
eagerness: (0, utils_1.getString)(record.eagerness, "low") || "low",
|
|
370
|
+
};
|
|
371
|
+
}
|
|
372
|
+
}
|
|
373
|
+
if (provider === "gemini-live") {
|
|
374
|
+
return {
|
|
375
|
+
type: "server_vad",
|
|
376
|
+
createResponse: typeof record.createResponse === "boolean" ? record.createResponse : true,
|
|
377
|
+
interruptResponse: typeof record.interruptResponse === "boolean" ? record.interruptResponse : true,
|
|
378
|
+
prefixPaddingMs: typeof record.prefixPaddingMs === "number" ? record.prefixPaddingMs : 120,
|
|
379
|
+
silenceDurationMs: typeof record.silenceDurationMs === "number" ? record.silenceDurationMs : 700,
|
|
380
|
+
startOfSpeechSensitivity: (0, utils_1.getString)(record.startOfSpeechSensitivity, "START_SENSITIVITY_LOW"),
|
|
381
|
+
endOfSpeechSensitivity: (0, utils_1.getString)(record.endOfSpeechSensitivity, "END_SENSITIVITY_LOW"),
|
|
382
|
+
disabled: typeof record.disabled === "boolean" ? record.disabled : false,
|
|
383
|
+
};
|
|
384
|
+
}
|
|
385
|
+
if (provider === "kognitive-voice") {
|
|
386
|
+
return {
|
|
387
|
+
type: "flux_eot",
|
|
388
|
+
eager_eot_threshold: typeof record.eager_eot_threshold === "number" ? record.eager_eot_threshold : 0.85,
|
|
389
|
+
eot_threshold: typeof record.eot_threshold === "number" ? record.eot_threshold : 0.65,
|
|
390
|
+
eot_timeout_ms: typeof record.eot_timeout_ms === "number" ? record.eot_timeout_ms : 900,
|
|
391
|
+
};
|
|
392
|
+
}
|
|
393
|
+
return {
|
|
394
|
+
type: (0, utils_1.getString)(record.type, "server_vad") || "server_vad",
|
|
395
|
+
createResponse: typeof record.createResponse === "boolean" ? record.createResponse : true,
|
|
396
|
+
interruptResponse: typeof record.interruptResponse === "boolean" ? record.interruptResponse : true,
|
|
397
|
+
threshold: typeof record.threshold === "number" ? record.threshold : 0.6,
|
|
398
|
+
prefixPaddingMs: typeof record.prefixPaddingMs === "number" ? record.prefixPaddingMs : 300,
|
|
399
|
+
silenceDurationMs: typeof record.silenceDurationMs === "number" ? record.silenceDurationMs : 700,
|
|
400
|
+
};
|
|
401
|
+
}
|
|
402
|
+
function normalizeCloudVoiceInputNoiseReduction(provider, channel, value) {
|
|
403
|
+
if (value === null)
|
|
404
|
+
return null;
|
|
405
|
+
const record = (0, utils_1.getRecord)(value);
|
|
406
|
+
if (Object.keys(record).length > 0)
|
|
407
|
+
return record;
|
|
408
|
+
if (provider === "openai-realtime" && isPhoneChannel(channel)) {
|
|
409
|
+
return { type: "near_field" };
|
|
410
|
+
}
|
|
411
|
+
return null;
|
|
412
|
+
}
|
|
413
|
+
function compileCartesiaTtsOptions(config) {
|
|
414
|
+
var _a, _b, _c;
|
|
415
|
+
const providerOptions = (0, utils_1.getRecord)(config.providerOptions);
|
|
416
|
+
const pipeline = (0, utils_1.getRecord)((_a = config.pipeline) !== null && _a !== void 0 ? _a : providerOptions.pipeline);
|
|
417
|
+
const tts = (0, utils_1.getRecord)(pipeline.tts);
|
|
418
|
+
const cartesia = (0, utils_1.getRecord)((_c = (_b = tts.options) !== null && _b !== void 0 ? _b : tts.providerOptions) !== null && _c !== void 0 ? _c : providerOptions.cartesia);
|
|
419
|
+
const speech = normalizeSpeechConfig(config);
|
|
420
|
+
const options = {
|
|
421
|
+
modelId: (0, utils_1.getString)(tts.model, (0, utils_1.getString)(cartesia.modelId, "sonic-3")),
|
|
422
|
+
voice: (0, utils_1.getString)(tts.voice, config.voice && config.voice !== "alloy" ? config.voice : DEFAULT_KOGNITIVE_PIPELINE.tts.voice),
|
|
423
|
+
};
|
|
424
|
+
const pace = (0, utils_1.getString)(speech === null || speech === void 0 ? void 0 : speech.pace, "").toLowerCase();
|
|
425
|
+
const speed = typeof cartesia.speed === "number" || cartesia.speed === "slow" || cartesia.speed === "normal" || cartesia.speed === "fast"
|
|
426
|
+
? cartesia.speed
|
|
427
|
+
: pace.includes("slow")
|
|
428
|
+
? "slow"
|
|
429
|
+
: pace.includes("fast") || pace.includes("energetic")
|
|
430
|
+
? "fast"
|
|
431
|
+
: "normal";
|
|
432
|
+
options.speed = speed;
|
|
433
|
+
const emotion = (0, utils_1.getString)(speech === null || speech === void 0 ? void 0 : speech.emotion, "");
|
|
434
|
+
if (emotion)
|
|
435
|
+
options.emotion = [emotion];
|
|
436
|
+
if (typeof cartesia.pronunciationDictId === "string" && cartesia.pronunciationDictId.trim()) {
|
|
437
|
+
options.pronunciationDictId = cartesia.pronunciationDictId.trim();
|
|
438
|
+
}
|
|
439
|
+
if (cartesia.contextMode === "reset" || cartesia.contextMode === "continue") {
|
|
440
|
+
options.contextMode = cartesia.contextMode;
|
|
441
|
+
}
|
|
442
|
+
return options;
|
|
443
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import type { CloudVoiceAgentConfig, CloudVoiceAgentSnapshot, CloudVoiceChannel, CloudVoiceClientToolManifest, CloudVoiceFunctionToolManifest, CloudVoiceParameterResolutionResult, CloudVoicePhonePrepareSnapshot, CloudVoiceToolBinding, CompiledCloudVoiceFlowInstructions, PreparedCloudVoiceConfig } from "./types";
|
|
2
|
+
export { CLOUD_VOICE_CONFIG_VERSION, CLOUD_VOICE_PROMPT_COMPILER_VERSION, CLOUD_VOICE_PROVIDER_CAPABILITIES, DEFAULT_CLOUD_VOICE_HUMANIZATION, compileCartesiaTtsOptions, getCloudVoiceProviderCapabilities, normalizeCloudVoiceConversationProfile, normalizeCloudVoiceHumanizationConfig, normalizeCloudVoicePipelineConfig, normalizeCloudVoiceTurnDetection, resolveCloudVoiceSpeechLanguageCode, toOpenAITurnDetection, toPreparedTranscription, } from "./cloud-voice-runtime-service";
|
|
3
|
+
export { buildPhoneControlRule, buildPhoneOpeningPrompt, compileCloudVoiceInstructions, resolveCloudVoiceProviderSystemPrompt, } from "./cloud-voice-prompt-service";
|
|
4
|
+
export declare const CLOUD_VOICE_CHANNELS: Set<CloudVoiceChannel>;
|
|
5
|
+
type CloudVoiceGraphEntryMode = "incoming" | "outgoing" | "both";
|
|
6
|
+
export declare const CLOUD_VOICE_PHONE_HANGUP_TOOL: CloudVoiceFunctionToolManifest;
|
|
7
|
+
export declare const CLOUD_VOICE_SIP_TRANSFER_TOOL: CloudVoiceFunctionToolManifest;
|
|
8
|
+
export declare function resolveCloudVoiceChannel(value: unknown): CloudVoiceChannel;
|
|
9
|
+
export declare function shouldInjectPhoneControlTools(channel?: CloudVoiceChannel): channel is "phone" | "sip" | "outbound";
|
|
10
|
+
export declare function shouldInjectSipControlTools(config: Pick<CloudVoiceAgentConfig, "metadata">, channel?: CloudVoiceChannel): boolean;
|
|
11
|
+
export declare function shouldPersistPhonePrepareSnapshot(channel: CloudVoiceChannel): channel is "phone" | "sip" | "outbound";
|
|
12
|
+
export declare function resolveToolInputSchema(tool: CloudVoiceToolBinding): Record<string, unknown>;
|
|
13
|
+
export declare const CLOUD_VOICE_FLOW_INSTRUCTION_COMPILER_VERSION = "voice-flow-paths-v1";
|
|
14
|
+
export declare function compileVoiceGraphToInstructions(graphInput: unknown, options?: {
|
|
15
|
+
tools?: CloudVoiceToolBinding[];
|
|
16
|
+
entryMode?: CloudVoiceGraphEntryMode;
|
|
17
|
+
channel?: CloudVoiceChannel;
|
|
18
|
+
}): CompiledCloudVoiceFlowInstructions | null;
|
|
19
|
+
export declare function compileCloudVoiceGraphConfig(config: CloudVoiceAgentConfig): CloudVoiceAgentConfig;
|
|
20
|
+
export declare function normalizeClientToolManifests(value: unknown): CloudVoiceClientToolManifest[];
|
|
21
|
+
export declare function prepareCloudVoiceSessionConfig(config: CloudVoiceAgentConfig, input: {
|
|
22
|
+
agentName: string;
|
|
23
|
+
sessionId: string;
|
|
24
|
+
resourceId: Record<string, unknown>;
|
|
25
|
+
channel?: CloudVoiceChannel;
|
|
26
|
+
clientTools?: CloudVoiceClientToolManifest[];
|
|
27
|
+
parameters?: Record<string, unknown>;
|
|
28
|
+
parameterResolution?: CloudVoiceParameterResolutionResult;
|
|
29
|
+
}): PreparedCloudVoiceConfig;
|
|
30
|
+
export declare function createPhonePrepareSnapshot(input: {
|
|
31
|
+
agent: CloudVoiceAgentSnapshot;
|
|
32
|
+
channel: CloudVoiceChannel;
|
|
33
|
+
config: CloudVoiceAgentConfig;
|
|
34
|
+
prepare: PreparedCloudVoiceConfig;
|
|
35
|
+
now?: Date;
|
|
36
|
+
}): CloudVoicePhonePrepareSnapshot;
|