@kognitivedev/backend-cloud 0.2.29
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.turbo/turbo-build.log +2 -0
- package/.turbo/turbo-test.log +14 -0
- package/CHANGELOG.md +11 -0
- package/README.md +88 -0
- package/dist/cloud-voice-parameters.d.ts +11 -0
- package/dist/cloud-voice-parameters.js +219 -0
- package/dist/cloud-voice-prompt-service.d.ts +24 -0
- package/dist/cloud-voice-prompt-service.js +382 -0
- package/dist/cloud-voice-runtime-service.d.ts +73 -0
- package/dist/cloud-voice-runtime-service.js +443 -0
- package/dist/cloud-voice.d.ts +36 -0
- package/dist/cloud-voice.js +683 -0
- package/dist/index.d.ts +10 -0
- package/dist/index.js +26 -0
- package/dist/phone-control.d.ts +50 -0
- package/dist/phone-control.js +97 -0
- package/dist/phone-runtime/audio-playout-tracker.d.ts +51 -0
- package/dist/phone-runtime/audio-playout-tracker.js +93 -0
- package/dist/phone-runtime/openai-twilio-realtime.d.ts +95 -0
- package/dist/phone-runtime/openai-twilio-realtime.js +1074 -0
- package/dist/tools.d.ts +2 -0
- package/dist/tools.js +216 -0
- package/dist/types.d.ts +468 -0
- package/dist/types.js +2 -0
- package/dist/utils.d.ts +3 -0
- package/dist/utils.js +14 -0
- package/package.json +47 -0
- package/src/__tests__/audio-playout-tracker.test.ts +46 -0
- package/src/__tests__/cloud-voice.test.ts +1006 -0
- package/src/__tests__/openai-twilio-realtime.test.ts +1193 -0
- package/src/__tests__/phone-control.test.ts +105 -0
- package/src/cloud-voice-parameters.ts +236 -0
- package/src/cloud-voice-prompt-service.ts +493 -0
- package/src/cloud-voice-runtime-service.ts +465 -0
- package/src/cloud-voice.ts +831 -0
- package/src/index.ts +10 -0
- package/src/phone-control.ts +156 -0
- package/src/phone-runtime/audio-playout-tracker.ts +132 -0
- package/src/phone-runtime/openai-twilio-realtime.ts +1250 -0
- package/src/tools.ts +227 -0
- package/src/types.ts +529 -0
- package/src/utils.ts +11 -0
- package/tsconfig.json +13 -0
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
import type {
|
|
2
|
+
CloudVoiceAgentConfig,
|
|
3
|
+
CloudVoiceChannel,
|
|
4
|
+
} from "./types";
|
|
5
|
+
import {
|
|
6
|
+
normalizeCloudVoiceHumanizationConfig,
|
|
7
|
+
normalizeSpeechConfig,
|
|
8
|
+
toLanguageInstruction,
|
|
9
|
+
} from "./cloud-voice-runtime-service";
|
|
10
|
+
import { getRecord, getString } from "./utils";
|
|
11
|
+
|
|
12
|
+
function isPhoneChannel(channel?: CloudVoiceChannel) {
|
|
13
|
+
return channel === "phone" || channel === "sip" || channel === "outbound";
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
type PromptLanguageId = "en" | "tr";
|
|
17
|
+
|
|
18
|
+
type PromptLanguagePack = {
|
|
19
|
+
fallbackSystem: string;
|
|
20
|
+
headings: {
|
|
21
|
+
identity: string;
|
|
22
|
+
authoredInstructions: string;
|
|
23
|
+
personality: string;
|
|
24
|
+
speechDirection: string;
|
|
25
|
+
unclearAudio: string;
|
|
26
|
+
confirmation: string;
|
|
27
|
+
toolSpeech: string;
|
|
28
|
+
escalation: string;
|
|
29
|
+
phoneRules: string;
|
|
30
|
+
flowInstructions: string;
|
|
31
|
+
};
|
|
32
|
+
labels: {
|
|
33
|
+
language: string;
|
|
34
|
+
style: string;
|
|
35
|
+
accent: string;
|
|
36
|
+
pace: string;
|
|
37
|
+
emotion: string;
|
|
38
|
+
};
|
|
39
|
+
identity: {
|
|
40
|
+
realtime: string;
|
|
41
|
+
phone: string;
|
|
42
|
+
noInternalNames: string;
|
|
43
|
+
publicIdentityOnly: string;
|
|
44
|
+
selectedLanguage: (language: string) => string;
|
|
45
|
+
};
|
|
46
|
+
naturalness: {
|
|
47
|
+
liveConversation: string;
|
|
48
|
+
shortSentences: string;
|
|
49
|
+
acknowledge: string;
|
|
50
|
+
noMarkdown: string;
|
|
51
|
+
lightFillers: string;
|
|
52
|
+
naturalFillers: string;
|
|
53
|
+
lowBackchannels: string;
|
|
54
|
+
mediumBackchannels: string;
|
|
55
|
+
rareDisfluency: string;
|
|
56
|
+
};
|
|
57
|
+
profile: {
|
|
58
|
+
personality: (value: string) => string;
|
|
59
|
+
tone: (value: string) => string;
|
|
60
|
+
pacing: (value: string) => string;
|
|
61
|
+
unclearConfirmBestGuess: string;
|
|
62
|
+
unclearAskRepeat: string;
|
|
63
|
+
confirmAllActions: string;
|
|
64
|
+
confirmMinimal: string;
|
|
65
|
+
confirmCritical: string;
|
|
66
|
+
escalateOnRequest: string;
|
|
67
|
+
escalateNever: string;
|
|
68
|
+
escalateWhenBlocked: string;
|
|
69
|
+
readbackEnabled: string;
|
|
70
|
+
readbackBrief: string;
|
|
71
|
+
};
|
|
72
|
+
tools: {
|
|
73
|
+
beforeTool: string;
|
|
74
|
+
afterTool: string;
|
|
75
|
+
toolFails: string;
|
|
76
|
+
};
|
|
77
|
+
phone: {
|
|
78
|
+
doNotEndAfterTask: string;
|
|
79
|
+
askClosingQuestion: string;
|
|
80
|
+
callHangupOnlyAfterClose: string;
|
|
81
|
+
sayGoodbyeBeforeHangup: string;
|
|
82
|
+
doNotRevealTools: string;
|
|
83
|
+
};
|
|
84
|
+
opening: {
|
|
85
|
+
exactFirstMessage: (firstMessage: string) => string;
|
|
86
|
+
continueFlow: string;
|
|
87
|
+
noInternalNames: string;
|
|
88
|
+
startGreeting: (style: string, language: string) => string;
|
|
89
|
+
publicIdentityOnly: string;
|
|
90
|
+
outboundRightPerson: string;
|
|
91
|
+
askHowCanHelp: string;
|
|
92
|
+
oneShortSentence: string;
|
|
93
|
+
styles: {
|
|
94
|
+
warm: string;
|
|
95
|
+
professional: string;
|
|
96
|
+
brief: string;
|
|
97
|
+
};
|
|
98
|
+
};
|
|
99
|
+
};
|
|
100
|
+
|
|
101
|
+
const PROMPT_LANGUAGE_PACKS: Record<PromptLanguageId, PromptLanguagePack> = {
|
|
102
|
+
en: {
|
|
103
|
+
fallbackSystem: "You are a helpful phone voice agent. Keep responses concise and natural.",
|
|
104
|
+
headings: {
|
|
105
|
+
identity: "Identity",
|
|
106
|
+
authoredInstructions: "Authored Instructions",
|
|
107
|
+
personality: "Personality, Tone, and Pacing",
|
|
108
|
+
speechDirection: "Speech Direction",
|
|
109
|
+
unclearAudio: "Unclear Audio",
|
|
110
|
+
confirmation: "Confirmation and Readback",
|
|
111
|
+
toolSpeech: "Tool Speech",
|
|
112
|
+
escalation: "Escalation and Handoff",
|
|
113
|
+
phoneRules: "Phone Rules",
|
|
114
|
+
flowInstructions: "Flow Instructions",
|
|
115
|
+
},
|
|
116
|
+
labels: {
|
|
117
|
+
language: "Language",
|
|
118
|
+
style: "Style",
|
|
119
|
+
accent: "Accent",
|
|
120
|
+
pace: "Pace",
|
|
121
|
+
emotion: "Emotion",
|
|
122
|
+
},
|
|
123
|
+
identity: {
|
|
124
|
+
realtime: "You are a realtime voice agent.",
|
|
125
|
+
phone: "You are a realtime phone voice agent.",
|
|
126
|
+
noInternalNames: "Never tell the caller your internal agent name, agent number, slug, node name, workflow name, or implementation details.",
|
|
127
|
+
publicIdentityOnly: "If you need to identify yourself, use only the public identity explicitly written in the authored instructions or first-message text.",
|
|
128
|
+
selectedLanguage: (language) => `Use ${language} for caller-facing speech. If authored instructions are written in another language, follow their meaning but speak ${language}.`,
|
|
129
|
+
},
|
|
130
|
+
naturalness: {
|
|
131
|
+
liveConversation: "- Speak like a natural live conversation, not written chat.",
|
|
132
|
+
shortSentences: "- Use short spoken sentences and contractions.",
|
|
133
|
+
acknowledge: "- Acknowledge the caller before answering when it fits.",
|
|
134
|
+
noMarkdown: "- Do not use markdown, bullet points, or long lists unless the caller asks for them.",
|
|
135
|
+
lightFillers: '- Use light fillers only when useful, such as "okay", "one sec", or "let me check".',
|
|
136
|
+
naturalFillers: '- Use natural, brief fillers when useful, such as "hm", "okay", "right", "one sec", or "let me check".',
|
|
137
|
+
lowBackchannels: '- Use occasional short acknowledgements such as "got it" or "sure" without interrupting the caller.',
|
|
138
|
+
mediumBackchannels: "- Use short acknowledgements naturally when the caller gives details, but keep them brief.",
|
|
139
|
+
rareDisfluency: '- Rarely use "uh" or "um"; never repeat fillers or sound uncertain on purpose.',
|
|
140
|
+
},
|
|
141
|
+
profile: {
|
|
142
|
+
personality: (value) => `- Personality: ${value}.`,
|
|
143
|
+
tone: (value) => `- Tone: ${value}.`,
|
|
144
|
+
pacing: (value) => `- Pacing: ${value}.`,
|
|
145
|
+
unclearConfirmBestGuess: "- If audio is unclear, briefly state what you think you heard and ask the caller to confirm.",
|
|
146
|
+
unclearAskRepeat: "- If audio is unclear, ask the caller to repeat that part instead of guessing.",
|
|
147
|
+
confirmAllActions: "- Confirm before every external action, transfer, booking, cancellation, or account change.",
|
|
148
|
+
confirmMinimal: "- Confirm only when the caller asks for a change or when ambiguity could cause harm.",
|
|
149
|
+
confirmCritical: "- Confirm critical fields before acting, including names, phone numbers, emails, order ids, dates, times, addresses, prices, and account changes.",
|
|
150
|
+
escalateOnRequest: "- Escalate or hand off when the caller asks for a person.",
|
|
151
|
+
escalateNever: "- Do not escalate unless a configured flow explicitly routes to a transfer.",
|
|
152
|
+
escalateWhenBlocked: "- Escalate or hand off when blocked, when the caller is frustrated, or when policy/tool limits prevent a reliable answer.",
|
|
153
|
+
readbackEnabled: "- Read back numbers, dates, times, and identifiers in small chunks before using them.",
|
|
154
|
+
readbackBrief: "- Keep readbacks brief and only use them when the caller asks or the value is ambiguous.",
|
|
155
|
+
},
|
|
156
|
+
tools: {
|
|
157
|
+
beforeTool: '- Before calling a tool while the caller is waiting, say a short acknowledgement such as "Let me check that" or "One sec".',
|
|
158
|
+
afterTool: "- After a tool returns, summarize the result in plain spoken language.",
|
|
159
|
+
toolFails: "- If a tool fails, apologize briefly and explain what can still be done.",
|
|
160
|
+
},
|
|
161
|
+
phone: {
|
|
162
|
+
doNotEndAfterTask: "For phone calls, do not end the call just because you answered a question or completed one task.",
|
|
163
|
+
askClosingQuestion: 'Before ending, ask a short closing question such as "Is there anything else I can help with?" unless the caller already clearly said goodbye.',
|
|
164
|
+
callHangupOnlyAfterClose: 'Call hang_up_call only after the caller explicitly asks to end the call, says goodbye, or answers "no" or equivalent after your closing question.',
|
|
165
|
+
sayGoodbyeBeforeHangup: 'Immediately before calling hang_up_call, say a brief natural goodbye such as "bye", "see you later", or "take care".',
|
|
166
|
+
doNotRevealTools: "If the caller asks what tools, functions, abilities, or actions you have access to, answer in plain language and do not call hang_up_call.",
|
|
167
|
+
},
|
|
168
|
+
opening: {
|
|
169
|
+
exactFirstMessage: (firstMessage) => `Start the phone call now: say exactly this first sentence, without adding any agent name, number, or extra introduction: ${firstMessage}`,
|
|
170
|
+
continueFlow: "After that, continue naturally and follow the configured conversation flow.",
|
|
171
|
+
noInternalNames: "Do not mention internal agent names, agent numbers, slugs, node names, or workflow names.",
|
|
172
|
+
startGreeting: (style, language) => `Start the phone call now with a ${style} greeting in ${language}.`,
|
|
173
|
+
publicIdentityOnly: "Only use a public human or company identity if it is explicitly present in the authored instructions; otherwise do not state a name.",
|
|
174
|
+
outboundRightPerson: "If this is an outbound call, confirm you are speaking with the right person before continuing.",
|
|
175
|
+
askHowCanHelp: "Ask how you can help.",
|
|
176
|
+
oneShortSentence: "Keep the opening to one short sentence.",
|
|
177
|
+
styles: {
|
|
178
|
+
warm: "warm and friendly",
|
|
179
|
+
professional: "professional and concise",
|
|
180
|
+
brief: "brief and natural",
|
|
181
|
+
},
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
tr: {
|
|
185
|
+
fallbackSystem: "Yardımcı bir telefon sesli asistanısın. Yanıtlarını kısa, doğal ve konuşma diline uygun tut.",
|
|
186
|
+
headings: {
|
|
187
|
+
identity: "Kimlik",
|
|
188
|
+
authoredInstructions: "Yazılmış Talimatlar",
|
|
189
|
+
personality: "Kişilik, Ton ve Tempo",
|
|
190
|
+
speechDirection: "Konuşma Yönlendirmesi",
|
|
191
|
+
unclearAudio: "Anlaşılmayan Ses",
|
|
192
|
+
confirmation: "Onay ve Geri Okuma",
|
|
193
|
+
toolSpeech: "Araç Konuşması",
|
|
194
|
+
escalation: "Aktarma ve İnsana Devretme",
|
|
195
|
+
phoneRules: "Telefon Kuralları",
|
|
196
|
+
flowInstructions: "Akış Talimatları",
|
|
197
|
+
},
|
|
198
|
+
labels: {
|
|
199
|
+
language: "Dil",
|
|
200
|
+
style: "Stil",
|
|
201
|
+
accent: "Aksan",
|
|
202
|
+
pace: "Tempo",
|
|
203
|
+
emotion: "Duygu",
|
|
204
|
+
},
|
|
205
|
+
identity: {
|
|
206
|
+
realtime: "Gerçek zamanlı bir sesli asistansın.",
|
|
207
|
+
phone: "Gerçek zamanlı bir telefon sesli asistansın.",
|
|
208
|
+
noInternalNames: "Arayana hiçbir zaman dahili agent adını, agent numarasını, slug'ı, düğüm adını, workflow adını veya uygulama detaylarını söyleme.",
|
|
209
|
+
publicIdentityOnly: "Kendini tanıtman gerekiyorsa yalnızca yazılmış talimatlarda veya ilk mesaj metninde açıkça verilen kamusal kimliği kullan.",
|
|
210
|
+
selectedLanguage: (language) => `Arayana dönük tüm konuşmalarda ${language} kullan. Yazılmış talimatlar başka dildeyse anlamını takip et ama ${language} konuş.`,
|
|
211
|
+
},
|
|
212
|
+
naturalness: {
|
|
213
|
+
liveConversation: "- Yazılı sohbet gibi değil, doğal bir canlı konuşma gibi konuş.",
|
|
214
|
+
shortSentences: "- Kısa, konuşma diline uygun cümleler kullan.",
|
|
215
|
+
acknowledge: "- Uygun olduğunda cevaplamadan önce arayanı kısaca onayla.",
|
|
216
|
+
noMarkdown: "- Arayan istemedikçe markdown, madde listesi veya uzun listeler kullanma.",
|
|
217
|
+
lightFillers: '- Sadece gerekli olduğunda "tamam", "bir saniye" veya "hemen bakıyorum" gibi hafif geçiş ifadeleri kullan.',
|
|
218
|
+
naturalFillers: '- Gerekli olduğunda "hmm", "tamam", "peki", "bir saniye" veya "hemen bakıyorum" gibi kısa ve doğal geçiş ifadeleri kullan.',
|
|
219
|
+
lowBackchannels: '- Arayanı bölmeden ara sıra "anladım" veya "tabii" gibi kısa onaylar kullan.',
|
|
220
|
+
mediumBackchannels: "- Arayan detay verirken doğal şekilde kısa onaylar kullan, ama bunları kısa tut.",
|
|
221
|
+
rareDisfluency: '- "ee" veya "ıı" gibi dolgu seslerini nadiren kullan; dolgu ifadelerini tekrar etme ve bilerek kararsız duyulma.',
|
|
222
|
+
},
|
|
223
|
+
profile: {
|
|
224
|
+
personality: (value) => `- Kişilik: ${value}.`,
|
|
225
|
+
tone: (value) => `- Ton: ${value}.`,
|
|
226
|
+
pacing: (value) => `- Tempo: ${value}.`,
|
|
227
|
+
unclearConfirmBestGuess: "- Ses net değilse, ne duyduğunu kısaca söyle ve arayandan onay iste.",
|
|
228
|
+
unclearAskRepeat: "- Ses net değilse tahmin etmek yerine arayandan o kısmı tekrar etmesini iste.",
|
|
229
|
+
confirmAllActions: "- Her harici işlem, aktarma, randevu, iptal veya hesap değişikliğinden önce onay al.",
|
|
230
|
+
confirmMinimal: "- Yalnızca arayan değişiklik istediğinde veya belirsizlik zarar verebilecekse onay al.",
|
|
231
|
+
confirmCritical: "- İşlem yapmadan önce ad, telefon numarası, e-posta, sipariş numarası, tarih, saat, adres, fiyat ve hesap değişikliği gibi kritik alanları onayla.",
|
|
232
|
+
escalateOnRequest: "- Arayan bir kişiyle görüşmek isterse aktar veya insana devret.",
|
|
233
|
+
escalateNever: "- Yapılandırılmış akış açıkça aktarmaya yönlendirmedikçe insana devretme.",
|
|
234
|
+
escalateWhenBlocked: "- Tıkanırsan, arayan gerilirse veya politika/araç sınırları güvenilir cevap vermeni engellerse aktar veya insana devret.",
|
|
235
|
+
readbackEnabled: "- Numara, tarih, saat ve tanımlayıcıları kullanmadan önce küçük parçalar halinde geri oku.",
|
|
236
|
+
readbackBrief: "- Geri okumaları kısa tut ve yalnızca arayan isterse veya değer belirsizse kullan.",
|
|
237
|
+
},
|
|
238
|
+
tools: {
|
|
239
|
+
beforeTool: '- Arayan beklerken araç çağırmadan önce "hemen bakıyorum" veya "bir saniye" gibi kısa bir onay söyle.',
|
|
240
|
+
afterTool: "- Araç sonucu döndüğünde sonucu sade konuşma diliyle özetle.",
|
|
241
|
+
toolFails: "- Araç hata verirse kısaca özür dile ve hâlâ ne yapılabileceğini açıkla.",
|
|
242
|
+
},
|
|
243
|
+
phone: {
|
|
244
|
+
doNotEndAfterTask: "Telefon görüşmelerinde yalnızca bir soruyu cevapladın veya bir işi tamamladın diye çağrıyı bitirme.",
|
|
245
|
+
askClosingQuestion: 'Bitirmeden önce, arayan açıkça vedalaşmadıysa "Yardımcı olabileceğim başka bir konu var mı?" gibi kısa bir kapanış sorusu sor.',
|
|
246
|
+
callHangupOnlyAfterClose: 'hang_up_call aracını yalnızca arayan açıkça çağrıyı bitirmek isterse, vedalaşırsa veya kapanış sorundan sonra "hayır" ya da eşdeğer bir yanıt verirse çağır.',
|
|
247
|
+
sayGoodbyeBeforeHangup: 'hang_up_call aracını çağırmadan hemen önce "görüşmek üzere", "iyi günler" veya "hoşça kalın" gibi kısa ve doğal bir veda söyle.',
|
|
248
|
+
doNotRevealTools: "Arayan hangi araçlara, fonksiyonlara, yeteneklere veya işlemlere erişebildiğini sorarsa sade dille cevap ver ve hang_up_call aracını çağırma.",
|
|
249
|
+
},
|
|
250
|
+
opening: {
|
|
251
|
+
exactFirstMessage: (firstMessage) => `Telefon görüşmesini şimdi başlat: başka agent adı, numarası veya ek tanıtım eklemeden ilk cümle olarak aynen şunu söyle: ${firstMessage}`,
|
|
252
|
+
continueFlow: "Sonrasında doğal şekilde devam et ve yapılandırılmış konuşma akışını takip et.",
|
|
253
|
+
noInternalNames: "Dahili agent adlarını, agent numaralarını, slug'ları, düğüm adlarını veya workflow adlarını söyleme.",
|
|
254
|
+
startGreeting: (style, language) => `Telefon görüşmesini şimdi ${language} dilinde ${style} bir selamlamayla başlat.`,
|
|
255
|
+
publicIdentityOnly: "Yalnızca yazılmış talimatlarda açıkça geçiyorsa kamusal bir kişi veya şirket kimliği kullan; aksi halde isim belirtme.",
|
|
256
|
+
outboundRightPerson: "Bu dış aramaysa, devam etmeden önce doğru kişiyle konuştuğunu teyit et.",
|
|
257
|
+
askHowCanHelp: "Nasıl yardımcı olabileceğini sor.",
|
|
258
|
+
oneShortSentence: "Açılışı tek kısa cümleyle sınırla.",
|
|
259
|
+
styles: {
|
|
260
|
+
warm: "sıcak ve samimi",
|
|
261
|
+
professional: "profesyonel ve kısa",
|
|
262
|
+
brief: "kısa ve doğal",
|
|
263
|
+
},
|
|
264
|
+
},
|
|
265
|
+
},
|
|
266
|
+
};
|
|
267
|
+
|
|
268
|
+
const LANGUAGE_ALIASES: Record<string, PromptLanguageId> = {
|
|
269
|
+
en: "en",
|
|
270
|
+
"en-us": "en",
|
|
271
|
+
"en-gb": "en",
|
|
272
|
+
tr: "tr",
|
|
273
|
+
"tr-tr": "tr",
|
|
274
|
+
};
|
|
275
|
+
|
|
276
|
+
function resolvePromptLanguage(config?: Pick<CloudVoiceAgentConfig, "metadata"> | Record<string, unknown>): PromptLanguageId {
|
|
277
|
+
const speech = normalizeSpeechConfig(getRecord(config) as Pick<CloudVoiceAgentConfig, "metadata">) ?? {};
|
|
278
|
+
const language = getString(speech.language, "").toLowerCase();
|
|
279
|
+
return LANGUAGE_ALIASES[language] ?? "en";
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
function getPromptLanguagePack(config?: Pick<CloudVoiceAgentConfig, "metadata"> | Record<string, unknown>) {
|
|
283
|
+
return PROMPT_LANGUAGE_PACKS[resolvePromptLanguage(config)];
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
function buildSpeechDirectives(config: Pick<CloudVoiceAgentConfig, "metadata">) {
|
|
287
|
+
const pack = getPromptLanguagePack(config);
|
|
288
|
+
const speech = normalizeSpeechConfig(config) ?? {};
|
|
289
|
+
const language = toLanguageInstruction(speech.language);
|
|
290
|
+
return [
|
|
291
|
+
[pack.labels.language, language],
|
|
292
|
+
[pack.labels.style, speech.style],
|
|
293
|
+
[pack.labels.accent, speech.accent],
|
|
294
|
+
[pack.labels.pace, speech.pace],
|
|
295
|
+
[pack.labels.emotion, speech.emotion],
|
|
296
|
+
]
|
|
297
|
+
.filter(([, value]) => typeof value === "string" && value.trim())
|
|
298
|
+
.map(([label, value]) => `- ${label}: ${String(value).trim()}`);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
function buildNaturalnessDirectives(config: Pick<CloudVoiceAgentConfig, "humanization" | "metadata">) {
|
|
302
|
+
const pack = getPromptLanguagePack(config);
|
|
303
|
+
const humanization = normalizeCloudVoiceHumanizationConfig(config.humanization);
|
|
304
|
+
if (!humanization.enabled) return [];
|
|
305
|
+
|
|
306
|
+
const directives = [
|
|
307
|
+
pack.naturalness.liveConversation,
|
|
308
|
+
pack.naturalness.shortSentences,
|
|
309
|
+
pack.naturalness.acknowledge,
|
|
310
|
+
pack.naturalness.noMarkdown,
|
|
311
|
+
];
|
|
312
|
+
if (humanization.fillerStyle === "light") {
|
|
313
|
+
directives.push(pack.naturalness.lightFillers);
|
|
314
|
+
} else if (humanization.fillerStyle === "natural") {
|
|
315
|
+
directives.push(pack.naturalness.naturalFillers);
|
|
316
|
+
}
|
|
317
|
+
if (humanization.backchannelFrequency === "low") {
|
|
318
|
+
directives.push(pack.naturalness.lowBackchannels);
|
|
319
|
+
} else if (humanization.backchannelFrequency === "medium") {
|
|
320
|
+
directives.push(pack.naturalness.mediumBackchannels);
|
|
321
|
+
}
|
|
322
|
+
if (humanization.disfluency === "rare") {
|
|
323
|
+
directives.push(pack.naturalness.rareDisfluency);
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
return directives;
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
function buildToolSpeechDirectives(toolCount: number, pack: PromptLanguagePack) {
|
|
330
|
+
if (toolCount <= 0) return [];
|
|
331
|
+
return [
|
|
332
|
+
pack.tools.beforeTool,
|
|
333
|
+
pack.tools.afterTool,
|
|
334
|
+
pack.tools.toolFails,
|
|
335
|
+
];
|
|
336
|
+
}
|
|
337
|
+
|
|
338
|
+
function section(title: string, lines: Array<string | undefined | null>) {
|
|
339
|
+
const filtered = lines.filter((line): line is string => typeof line === "string" && line.trim().length > 0);
|
|
340
|
+
return filtered.length > 0 ? [`## ${title}`, ...filtered].join("\n") : "";
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
function entryModeForChannel(channel?: CloudVoiceChannel) {
|
|
344
|
+
return channel === "outbound" ? "outgoing" : channel === "phone" ? "incoming" : "both";
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
function normalizeEntryMode(value: unknown) {
|
|
348
|
+
if (value === "incoming" || value === "inbound") return "incoming";
|
|
349
|
+
if (value === "outgoing" || value === "outbound") return "outgoing";
|
|
350
|
+
return "both";
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
function getConfiguredFirstMessage(config?: Pick<CloudVoiceAgentConfig, "metadata"> | Record<string, unknown>, channel?: CloudVoiceChannel) {
|
|
354
|
+
const metadata = getRecord(config?.metadata);
|
|
355
|
+
const flowGraph = getRecord(metadata.flowGraph);
|
|
356
|
+
const nodes = Array.isArray(flowGraph.nodes) ? flowGraph.nodes : [];
|
|
357
|
+
const startNodeId = getString(flowGraph.startNodeId);
|
|
358
|
+
const normalizedNodes = nodes.map(getRecord);
|
|
359
|
+
const initialNodes = normalizedNodes.filter((node) => getString(node.type) === "initial");
|
|
360
|
+
const entryMode = entryModeForChannel(channel);
|
|
361
|
+
const startNode = (entryMode !== "both"
|
|
362
|
+
? initialNodes.find((node) => normalizeEntryMode(node.entryMode) === entryMode)
|
|
363
|
+
: null)
|
|
364
|
+
?? initialNodes.find((node) => normalizeEntryMode(node.entryMode) === "both")
|
|
365
|
+
?? normalizedNodes.find((node) => getString(node.id) === startNodeId)
|
|
366
|
+
?? initialNodes[0];
|
|
367
|
+
return getString(startNode?.firstMessage);
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
export function compileCloudVoiceInstructions(input: {
|
|
371
|
+
config: Pick<CloudVoiceAgentConfig, "humanization" | "metadata" | "instructions">;
|
|
372
|
+
agentName?: string;
|
|
373
|
+
channel?: CloudVoiceChannel;
|
|
374
|
+
toolCount?: number;
|
|
375
|
+
phoneControlRule?: string;
|
|
376
|
+
compiledFlowInstructions?: string;
|
|
377
|
+
}) {
|
|
378
|
+
const pack = getPromptLanguagePack(input.config);
|
|
379
|
+
const humanization = normalizeCloudVoiceHumanizationConfig(input.config.humanization);
|
|
380
|
+
const profile = humanization.conversationProfile;
|
|
381
|
+
const isPhone = isPhoneChannel(input.channel);
|
|
382
|
+
const identity = isPhone ? pack.identity.phone : pack.identity.realtime;
|
|
383
|
+
const authoredInstructions = getString(input.config.instructions, "").trim();
|
|
384
|
+
const speech = normalizeSpeechConfig(input.config) ?? {};
|
|
385
|
+
const selectedLanguage = toLanguageInstruction(speech.language);
|
|
386
|
+
const personalityLines = [
|
|
387
|
+
pack.profile.personality(profile.personality),
|
|
388
|
+
pack.profile.tone(profile.tone),
|
|
389
|
+
pack.profile.pacing(profile.pacing),
|
|
390
|
+
...buildNaturalnessDirectives(input.config),
|
|
391
|
+
];
|
|
392
|
+
const unclearAudio = profile.unclearAudio === "confirm_best_guess"
|
|
393
|
+
? pack.profile.unclearConfirmBestGuess
|
|
394
|
+
: pack.profile.unclearAskRepeat;
|
|
395
|
+
const confirmation = profile.confirmation === "all_actions"
|
|
396
|
+
? pack.profile.confirmAllActions
|
|
397
|
+
: profile.confirmation === "minimal"
|
|
398
|
+
? pack.profile.confirmMinimal
|
|
399
|
+
: pack.profile.confirmCritical;
|
|
400
|
+
const escalation = profile.escalation === "on_request"
|
|
401
|
+
? pack.profile.escalateOnRequest
|
|
402
|
+
: profile.escalation === "never"
|
|
403
|
+
? pack.profile.escalateNever
|
|
404
|
+
: pack.profile.escalateWhenBlocked;
|
|
405
|
+
const readback = profile.numberReadback
|
|
406
|
+
? pack.profile.readbackEnabled
|
|
407
|
+
: pack.profile.readbackBrief;
|
|
408
|
+
return [
|
|
409
|
+
section(pack.headings.identity, [
|
|
410
|
+
identity,
|
|
411
|
+
selectedLanguage ? pack.identity.selectedLanguage(selectedLanguage) : undefined,
|
|
412
|
+
pack.identity.noInternalNames,
|
|
413
|
+
pack.identity.publicIdentityOnly,
|
|
414
|
+
]),
|
|
415
|
+
section(pack.headings.authoredInstructions, [authoredInstructions]),
|
|
416
|
+
section(pack.headings.personality, personalityLines),
|
|
417
|
+
section(pack.headings.speechDirection, buildSpeechDirectives(input.config)),
|
|
418
|
+
section(pack.headings.unclearAudio, [unclearAudio]),
|
|
419
|
+
section(pack.headings.confirmation, [confirmation, readback]),
|
|
420
|
+
section(pack.headings.toolSpeech, buildToolSpeechDirectives(input.toolCount ?? 0, pack)),
|
|
421
|
+
section(pack.headings.escalation, [escalation]),
|
|
422
|
+
section(pack.headings.phoneRules, [input.phoneControlRule]),
|
|
423
|
+
section(pack.headings.flowInstructions, [input.compiledFlowInstructions]),
|
|
424
|
+
].filter((block) => block.trim()).join("\n\n");
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
export function buildPhoneControlRule(input: {
|
|
428
|
+
config?: Pick<CloudVoiceAgentConfig, "metadata"> | Record<string, unknown>;
|
|
429
|
+
channel?: CloudVoiceChannel;
|
|
430
|
+
}) {
|
|
431
|
+
if (!isPhoneChannel(input.channel)) return "";
|
|
432
|
+
const pack = getPromptLanguagePack(input.config);
|
|
433
|
+
return [
|
|
434
|
+
pack.phone.doNotEndAfterTask,
|
|
435
|
+
pack.phone.askClosingQuestion,
|
|
436
|
+
pack.phone.callHangupOnlyAfterClose,
|
|
437
|
+
pack.phone.sayGoodbyeBeforeHangup,
|
|
438
|
+
pack.phone.doNotRevealTools,
|
|
439
|
+
].join("\n");
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
export function buildPhoneOpeningPrompt(input: {
|
|
443
|
+
config?: Pick<CloudVoiceAgentConfig, "humanization" | "metadata"> | Record<string, unknown>;
|
|
444
|
+
agentName?: string;
|
|
445
|
+
channel?: CloudVoiceChannel;
|
|
446
|
+
}) {
|
|
447
|
+
const config = getRecord(input.config);
|
|
448
|
+
const pack = getPromptLanguagePack(config);
|
|
449
|
+
const humanization = normalizeCloudVoiceHumanizationConfig(config.humanization);
|
|
450
|
+
if (humanization.openingMode === "wait") return null;
|
|
451
|
+
const firstMessage = getConfiguredFirstMessage(config, input.channel);
|
|
452
|
+
if (firstMessage) {
|
|
453
|
+
return [
|
|
454
|
+
pack.opening.exactFirstMessage(firstMessage),
|
|
455
|
+
pack.opening.continueFlow,
|
|
456
|
+
pack.opening.noInternalNames,
|
|
457
|
+
].join(" ");
|
|
458
|
+
}
|
|
459
|
+
|
|
460
|
+
const profile = humanization.conversationProfile;
|
|
461
|
+
const speech = getRecord(getRecord(config.metadata).speech);
|
|
462
|
+
const language = typeof speech.language === "string" && speech.language
|
|
463
|
+
? toLanguageInstruction(speech.language) ?? speech.language
|
|
464
|
+
: "the selected language";
|
|
465
|
+
const style = profile.tone === "empathetic" || humanization.openingStyle === "warm"
|
|
466
|
+
? pack.opening.styles.warm
|
|
467
|
+
: profile.tone === "professional" || profile.tone === "polished" || humanization.openingStyle === "professional"
|
|
468
|
+
? pack.opening.styles.professional
|
|
469
|
+
: pack.opening.styles.brief;
|
|
470
|
+
const channelHint = input.channel === "outbound"
|
|
471
|
+
? pack.opening.outboundRightPerson
|
|
472
|
+
: pack.opening.askHowCanHelp;
|
|
473
|
+
|
|
474
|
+
return [
|
|
475
|
+
pack.opening.startGreeting(style, language),
|
|
476
|
+
pack.opening.noInternalNames,
|
|
477
|
+
pack.opening.publicIdentityOnly,
|
|
478
|
+
channelHint,
|
|
479
|
+
pack.opening.oneShortSentence,
|
|
480
|
+
].join(" ");
|
|
481
|
+
}
|
|
482
|
+
|
|
483
|
+
export function resolveCloudVoiceProviderSystemPrompt(input: {
|
|
484
|
+
voiceConfig?: { system?: unknown } | null;
|
|
485
|
+
config?: Pick<CloudVoiceAgentConfig, "metadata" | "instructions"> | Record<string, unknown> | null;
|
|
486
|
+
}) {
|
|
487
|
+
const voiceSystem = getString(input.voiceConfig?.system, "");
|
|
488
|
+
if (voiceSystem) return voiceSystem;
|
|
489
|
+
const config = getRecord(input.config);
|
|
490
|
+
const authoredInstructions = getString(config.instructions, "");
|
|
491
|
+
if (authoredInstructions) return authoredInstructions;
|
|
492
|
+
return getPromptLanguagePack(config).fallbackSystem;
|
|
493
|
+
}
|