@kognitivedev/backend-cloud 0.2.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/.turbo/turbo-build.log +2 -0
  2. package/.turbo/turbo-test.log +14 -0
  3. package/CHANGELOG.md +11 -0
  4. package/README.md +88 -0
  5. package/dist/cloud-voice-parameters.d.ts +11 -0
  6. package/dist/cloud-voice-parameters.js +219 -0
  7. package/dist/cloud-voice-prompt-service.d.ts +24 -0
  8. package/dist/cloud-voice-prompt-service.js +382 -0
  9. package/dist/cloud-voice-runtime-service.d.ts +73 -0
  10. package/dist/cloud-voice-runtime-service.js +443 -0
  11. package/dist/cloud-voice.d.ts +36 -0
  12. package/dist/cloud-voice.js +683 -0
  13. package/dist/index.d.ts +10 -0
  14. package/dist/index.js +26 -0
  15. package/dist/phone-control.d.ts +50 -0
  16. package/dist/phone-control.js +97 -0
  17. package/dist/phone-runtime/audio-playout-tracker.d.ts +51 -0
  18. package/dist/phone-runtime/audio-playout-tracker.js +93 -0
  19. package/dist/phone-runtime/openai-twilio-realtime.d.ts +95 -0
  20. package/dist/phone-runtime/openai-twilio-realtime.js +1074 -0
  21. package/dist/tools.d.ts +2 -0
  22. package/dist/tools.js +216 -0
  23. package/dist/types.d.ts +468 -0
  24. package/dist/types.js +2 -0
  25. package/dist/utils.d.ts +3 -0
  26. package/dist/utils.js +14 -0
  27. package/package.json +47 -0
  28. package/src/__tests__/audio-playout-tracker.test.ts +46 -0
  29. package/src/__tests__/cloud-voice.test.ts +1006 -0
  30. package/src/__tests__/openai-twilio-realtime.test.ts +1193 -0
  31. package/src/__tests__/phone-control.test.ts +105 -0
  32. package/src/cloud-voice-parameters.ts +236 -0
  33. package/src/cloud-voice-prompt-service.ts +493 -0
  34. package/src/cloud-voice-runtime-service.ts +465 -0
  35. package/src/cloud-voice.ts +831 -0
  36. package/src/index.ts +10 -0
  37. package/src/phone-control.ts +156 -0
  38. package/src/phone-runtime/audio-playout-tracker.ts +132 -0
  39. package/src/phone-runtime/openai-twilio-realtime.ts +1250 -0
  40. package/src/tools.ts +227 -0
  41. package/src/types.ts +529 -0
  42. package/src/utils.ts +11 -0
  43. package/tsconfig.json +13 -0
@@ -0,0 +1,831 @@
1
+ import type {
2
+ CloudVoiceAgentConfig,
3
+ CloudVoiceAgentSnapshot,
4
+ CloudVoiceChannel,
5
+ CloudVoiceClientToolManifest,
6
+ CloudVoiceFlowEdge,
7
+ CloudVoiceFlowGraph,
8
+ CloudVoiceFlowNode,
9
+ CloudVoiceFlowNodeOutput,
10
+ CloudVoiceFunctionToolManifest,
11
+ CloudVoiceParameterResolutionResult,
12
+ CloudVoicePhonePrepareSnapshot,
13
+ CloudVoiceSpeechConfig,
14
+ CloudVoiceToolBinding,
15
+ CloudVoiceTransferDestination,
16
+ CloudVoiceTransferMode,
17
+ CompiledCloudVoiceFlowInstructions,
18
+ PreparedCloudVoiceConfig,
19
+ } from "./types";
20
+ import { getRecord, getString } from "./utils";
21
+ import {
22
+ CLOUD_VOICE_CONFIG_VERSION,
23
+ CLOUD_VOICE_PROMPT_COMPILER_VERSION,
24
+ compileCartesiaTtsOptions,
25
+ getCloudVoiceProviderCapabilities,
26
+ normalizeCloudVoiceInputNoiseReduction,
27
+ normalizeCloudVoicePipelineConfig,
28
+ normalizeCloudVoiceTurnDetection,
29
+ normalizeSpeechConfig,
30
+ resolveCloudVoiceSpeechLanguageCode,
31
+ toPreparedTranscription,
32
+ } from "./cloud-voice-runtime-service";
33
+ import { buildPhoneControlRule, compileCloudVoiceInstructions } from "./cloud-voice-prompt-service";
34
+ import {
35
+ renderCloudVoiceParameterObject,
36
+ renderCloudVoiceParameterTemplate,
37
+ } from "./cloud-voice-parameters";
38
+
39
+ export {
40
+ CLOUD_VOICE_CONFIG_VERSION,
41
+ CLOUD_VOICE_PROMPT_COMPILER_VERSION,
42
+ CLOUD_VOICE_PROVIDER_CAPABILITIES,
43
+ DEFAULT_CLOUD_VOICE_HUMANIZATION,
44
+ compileCartesiaTtsOptions,
45
+ getCloudVoiceProviderCapabilities,
46
+ normalizeCloudVoiceConversationProfile,
47
+ normalizeCloudVoiceHumanizationConfig,
48
+ normalizeCloudVoicePipelineConfig,
49
+ normalizeCloudVoiceTurnDetection,
50
+ resolveCloudVoiceSpeechLanguageCode,
51
+ toOpenAITurnDetection,
52
+ toPreparedTranscription,
53
+ } from "./cloud-voice-runtime-service";
54
+ export {
55
+ buildPhoneControlRule,
56
+ buildPhoneOpeningPrompt,
57
+ compileCloudVoiceInstructions,
58
+ resolveCloudVoiceProviderSystemPrompt,
59
+ } from "./cloud-voice-prompt-service";
60
+
61
+ export const CLOUD_VOICE_CHANNELS = new Set<CloudVoiceChannel>(["web", "iframe", "script", "phone", "sip", "outbound"]);
62
+ type CloudVoiceGraphEntryMode = "incoming" | "outgoing" | "both";
63
+
64
+ export const CLOUD_VOICE_PHONE_HANGUP_TOOL: CloudVoiceFunctionToolManifest = {
65
+ type: "function",
66
+ name: "hang_up_call",
67
+ description: "End the active phone call only after the caller clearly asks to end the call, says goodbye, or answers that they do not need anything else after you ask. Before using this tool, make sure the conversation has a natural closing: ask if there is anything else unless the caller already clearly ended, then say a brief goodbye such as \"bye\", \"see you later\", or \"take care\". Never use this when the caller asks what tools, abilities, or actions you have access to.",
68
+ parameters: {
69
+ type: "object",
70
+ properties: {
71
+ reason: {
72
+ type: "string",
73
+ description: "Short reason for ending the call.",
74
+ },
75
+ },
76
+ required: ["reason"],
77
+ additionalProperties: false,
78
+ },
79
+ };
80
+
81
+ export const CLOUD_VOICE_SIP_TRANSFER_TOOL: CloudVoiceFunctionToolManifest = {
82
+ type: "function",
83
+ name: "sip_transfer_call",
84
+ description: "Transfer the active SIP or phone call to a configured destination only when the current voice flow or agent policy allows transfer. Use this for live SIP/queue/extension/phone-number handoff, not for summarizing information.",
85
+ parameters: {
86
+ type: "object",
87
+ properties: {
88
+ providerCallId: {
89
+ type: "string",
90
+ description: "Provider call id for the active call leg. Omit when the runtime supplies it out of band.",
91
+ },
92
+ destinationId: {
93
+ type: "string",
94
+ description: "Configured transfer destination id.",
95
+ },
96
+ mode: {
97
+ type: "string",
98
+ enum: ["blind", "attended", "warm"],
99
+ description: "Transfer mode requested by the flow policy.",
100
+ },
101
+ reason: {
102
+ type: "string",
103
+ description: "Brief reason for the transfer.",
104
+ },
105
+ },
106
+ required: ["destinationId", "reason"],
107
+ additionalProperties: false,
108
+ },
109
+ };
110
+
111
+ export function resolveCloudVoiceChannel(value: unknown): CloudVoiceChannel {
112
+ if (value === undefined || value === null || value === "") return "web";
113
+ if (typeof value === "string" && CLOUD_VOICE_CHANNELS.has(value as CloudVoiceChannel)) {
114
+ return value as CloudVoiceChannel;
115
+ }
116
+ throw new Error(`Unsupported cloud voice channel "${String(value)}"`);
117
+ }
118
+
119
+ export function shouldInjectPhoneControlTools(channel?: CloudVoiceChannel) {
120
+ return channel === "phone" || channel === "sip" || channel === "outbound";
121
+ }
122
+
123
+ function getTransferDestinations(config: Pick<CloudVoiceAgentConfig, "metadata">) {
124
+ const destinations = getRecord(config.metadata).transferDestinations;
125
+ return Array.isArray(destinations)
126
+ ? destinations.filter((destination): destination is CloudVoiceTransferDestination => {
127
+ const record = getRecord(destination);
128
+ return typeof record.id === "string" && record.id.trim() !== "" && typeof record.destination === "object";
129
+ })
130
+ : [];
131
+ }
132
+
133
+ function entryModeForChannel(channel?: CloudVoiceChannel): CloudVoiceGraphEntryMode {
134
+ return channel === "outbound" ? "outgoing" : channel === "phone" ? "incoming" : "both";
135
+ }
136
+
137
+ function normalizeEntryMode(value: unknown): CloudVoiceGraphEntryMode {
138
+ if (value === "incoming" || value === "inbound") return "incoming";
139
+ if (value === "outgoing" || value === "outbound") return "outgoing";
140
+ return "both";
141
+ }
142
+
143
+ export function shouldInjectSipControlTools(config: Pick<CloudVoiceAgentConfig, "metadata">, channel?: CloudVoiceChannel) {
144
+ if (channel !== "sip" && channel !== "phone" && channel !== "outbound") return false;
145
+ return getTransferDestinations(config).some((destination) => destination.destination.type !== "browser_queue");
146
+ }
147
+
148
+ export function shouldPersistPhonePrepareSnapshot(channel: CloudVoiceChannel) {
149
+ return shouldInjectPhoneControlTools(channel);
150
+ }
151
+
152
+ export function resolveToolInputSchema(tool: CloudVoiceToolBinding) {
153
+ const schema = tool.inputSchema ?? getRecord(tool.config).inputSchema;
154
+ return schema && typeof schema === "object" && !Array.isArray(schema)
155
+ ? schema as Record<string, unknown>
156
+ : { type: "object", additionalProperties: true };
157
+ }
158
+
159
+ const MAX_FLOW_NODES = 64;
160
+ const MAX_FLOW_EDGES = 128;
161
+ const MAX_FLOW_TEXT_LENGTH = 900;
162
+ export const CLOUD_VOICE_FLOW_INSTRUCTION_COMPILER_VERSION = "voice-flow-paths-v1";
163
+
164
+ function getFlowArray(value: unknown): unknown[] {
165
+ return Array.isArray(value) ? value : [];
166
+ }
167
+
168
+ function cleanFlowText(value: unknown, maxLength = MAX_FLOW_TEXT_LENGTH) {
169
+ const text = getString(value, "").replace(/\s+/g, " ").trim();
170
+ return text.length > maxLength ? `${text.slice(0, maxLength - 3)}...` : text;
171
+ }
172
+
173
+ function normalizeFlowOutput(value: unknown): CloudVoiceFlowNodeOutput | null {
174
+ const record = getRecord(value);
175
+ const id = cleanFlowText(record.id, 96);
176
+ const label = cleanFlowText(record.label, 140);
177
+ if (!id || !label) return null;
178
+ return {
179
+ id,
180
+ label,
181
+ description: cleanFlowText(record.description, 360) || undefined,
182
+ desc: cleanFlowText(record.desc, 360) || undefined,
183
+ condition: cleanFlowText(record.condition, 360) || undefined,
184
+ icon: cleanFlowText(record.icon, 96) || undefined,
185
+ metadata: getRecord(record.metadata),
186
+ };
187
+ }
188
+
189
+ function normalizeFlowNode(value: unknown): CloudVoiceFlowNode | null {
190
+ const record = getRecord(value);
191
+ const id = cleanFlowText(record.id, 96);
192
+ const type = cleanFlowText(record.type, 64) || "case";
193
+ if (!id) return null;
194
+ const toolIds = getFlowArray(record.toolIds)
195
+ .map((toolId) => cleanFlowText(toolId, 96))
196
+ .filter(Boolean);
197
+ const phoneNumberIds = getFlowArray(record.phoneNumberIds)
198
+ .map((phoneNumberId) => cleanFlowText(phoneNumberId, 96))
199
+ .filter(Boolean);
200
+ const maxDuration = typeof record.maxDuration === "number" && Number.isFinite(record.maxDuration)
201
+ ? Math.round(record.maxDuration)
202
+ : cleanFlowText(record.maxDuration, 32) || undefined;
203
+ const transferMode = record.transferMode === "attended" || record.transferMode === "warm" || record.transferMode === "blind"
204
+ ? record.transferMode as CloudVoiceTransferMode
205
+ : undefined;
206
+ const metadata = getRecord(record.metadata);
207
+ const subAgentMetadata = type === "sub_agent"
208
+ ? {
209
+ ...getRecord(metadata.subAgent),
210
+ ...(getString(record.subAgentToolId, "") ? { toolId: getString(record.subAgentToolId, "") } : {}),
211
+ ...(getString(record.subAgentSlug, "") ? { agentSlug: getString(record.subAgentSlug, "") } : {}),
212
+ }
213
+ : null;
214
+ return {
215
+ id,
216
+ type,
217
+ entryMode: normalizeEntryMode(record.entryMode),
218
+ title: cleanFlowText(record.title, 160) || undefined,
219
+ prompt: cleanFlowText(record.prompt) || undefined,
220
+ firstMessage: cleanFlowText(record.firstMessage, 360) || undefined,
221
+ language: cleanFlowText(record.language, 32) || undefined,
222
+ maxDuration,
223
+ outputs: getFlowArray(record.outputs).map(normalizeFlowOutput).filter((output): output is CloudVoiceFlowNodeOutput => Boolean(output)),
224
+ toolId: cleanFlowText(record.toolId, 96) || undefined,
225
+ toolIds,
226
+ phoneNumberIds,
227
+ transferTarget: cleanFlowText(record.transferTarget, 160) || undefined,
228
+ transferDestinationId: cleanFlowText(record.transferDestinationId, 96) || undefined,
229
+ transferMode,
230
+ metadata: subAgentMetadata ? { ...metadata, subAgent: subAgentMetadata } : metadata,
231
+ };
232
+ }
233
+
234
+ function normalizeFlowEdge(value: unknown): CloudVoiceFlowEdge | null {
235
+ const record = getRecord(value);
236
+ const from = cleanFlowText(record.from, 96);
237
+ const to = cleanFlowText(record.to, 96);
238
+ if (!from || !to) return null;
239
+ const fromPort = record.fromPort === null ? null : cleanFlowText(record.fromPort, 96) || null;
240
+ return {
241
+ id: cleanFlowText(record.id, 96) || undefined,
242
+ from,
243
+ fromPort,
244
+ to,
245
+ };
246
+ }
247
+
248
+ function normalizeFlowGraph(value: unknown): CloudVoiceFlowGraph | null {
249
+ const record = getRecord(value);
250
+ const nodes = getFlowArray(record.nodes)
251
+ .slice(0, MAX_FLOW_NODES)
252
+ .map(normalizeFlowNode)
253
+ .filter((node): node is CloudVoiceFlowNode => Boolean(node));
254
+ const knownNodeIds = new Set(nodes.map((node) => node.id));
255
+ const edges = getFlowArray(record.edges)
256
+ .slice(0, MAX_FLOW_EDGES)
257
+ .map(normalizeFlowEdge)
258
+ .filter((edge): edge is CloudVoiceFlowEdge => Boolean(edge && knownNodeIds.has(edge.from) && knownNodeIds.has(edge.to)));
259
+ if (nodes.length === 0) return null;
260
+ const startNodeId = cleanFlowText(record.startNodeId, 96);
261
+ return {
262
+ version: record.version === 1 ? 1 : undefined,
263
+ startNodeId: startNodeId && knownNodeIds.has(startNodeId) ? startNodeId : undefined,
264
+ nodes,
265
+ edges,
266
+ metadata: getRecord(record.metadata),
267
+ };
268
+ }
269
+
270
+ function resolveFlowStartNode(graph: CloudVoiceFlowGraph, options: { entryMode?: CloudVoiceGraphEntryMode } = {}) {
271
+ const entryMode = options.entryMode ?? "both";
272
+ const initialNodes = graph.nodes.filter((node) => node.type === "initial");
273
+ const matchingStart = entryMode !== "both"
274
+ ? initialNodes.find((node) => normalizeEntryMode(node.entryMode) === entryMode)
275
+ : null;
276
+ const bothStart = initialNodes.find((node) => normalizeEntryMode(node.entryMode) === "both");
277
+ const legacyStart = graph.nodes.find((node) => node.id === graph.startNodeId)
278
+ ?? initialNodes[0]
279
+ ?? graph.nodes[0]
280
+ ?? null;
281
+ return matchingStart ?? bothStart ?? legacyStart;
282
+ }
283
+
284
+ function outputDescription(output: CloudVoiceFlowNodeOutput) {
285
+ return output.condition ?? output.description ?? output.desc ?? "";
286
+ }
287
+
288
+ function edgeTargetTitle(edge: CloudVoiceFlowEdge, nodesById: Map<string, CloudVoiceFlowNode>) {
289
+ const target = nodesById.get(edge.to);
290
+ return target ? `${target.title ?? target.id} (${target.type})` : edge.to;
291
+ }
292
+
293
+ function collectNodeToolIds(node: CloudVoiceFlowNode) {
294
+ const metadata = getRecord(node.metadata);
295
+ const subAgent = getRecord(metadata.subAgent);
296
+ return [
297
+ node.toolId,
298
+ ...(node.toolIds ?? []),
299
+ getString((node as unknown as Record<string, unknown>).subAgentToolId, ""),
300
+ getString(subAgent.toolId, ""),
301
+ getString(subAgent.subAgentToolId, ""),
302
+ getString(metadata.toolId, ""),
303
+ getString(metadata.toolName, ""),
304
+ ].map((value) => cleanFlowText(value, 96)).filter(Boolean);
305
+ }
306
+
307
+ function toolToManifest(tool: CloudVoiceToolBinding): CloudVoiceFunctionToolManifest {
308
+ return {
309
+ type: "function",
310
+ name: tool.id,
311
+ description: tool.description ?? tool.name,
312
+ parameters: resolveToolInputSchema(tool),
313
+ };
314
+ }
315
+
316
+ function stableJson(value: unknown): string {
317
+ if (Array.isArray(value)) return `[${value.map(stableJson).join(",")}]`;
318
+ if (value && typeof value === "object") {
319
+ const record = value as Record<string, unknown>;
320
+ return `{${Object.keys(record).sort().map((key) => `${JSON.stringify(key)}:${stableJson(record[key])}`).join(",")}}`;
321
+ }
322
+ return JSON.stringify(value);
323
+ }
324
+
325
+ function hashText(value: string) {
326
+ let hash = 2166136261;
327
+ for (let index = 0; index < value.length; index += 1) {
328
+ hash ^= value.charCodeAt(index);
329
+ hash = Math.imul(hash, 16777619);
330
+ }
331
+ return (hash >>> 0).toString(36);
332
+ }
333
+
334
+ function graphSignature(graph: CloudVoiceFlowGraph) {
335
+ return hashText(stableJson(graph));
336
+ }
337
+
338
+ function toolLabel(toolId: string, toolsById: Map<string, CloudVoiceToolBinding>) {
339
+ const tool = toolsById.get(toolId);
340
+ return tool ? `${tool.name ?? tool.id} (\`${tool.id}\`)` : `\`${toolId}\``;
341
+ }
342
+
343
+ function toolRequiredInputs(tool: CloudVoiceToolBinding | undefined) {
344
+ if (!tool) return "";
345
+ const schema = getRecord(resolveToolInputSchema(tool));
346
+ const required = Array.isArray(schema.required)
347
+ ? schema.required.filter((item): item is string => typeof item === "string" && item.trim().length > 0)
348
+ : [];
349
+ return required.length > 0 ? required.join(", ") : "";
350
+ }
351
+
352
+ function outgoingEdges(nodeId: string, edgesBySource: Map<string, CloudVoiceFlowEdge[]>) {
353
+ return edgesBySource.get(nodeId) ?? [];
354
+ }
355
+
356
+ function outputForEdge(node: CloudVoiceFlowNode | undefined, edge: CloudVoiceFlowEdge) {
357
+ return node?.outputs?.find((output) => output.id === edge.fromPort) ?? null;
358
+ }
359
+
360
+ function walkFlowPath(input: {
361
+ fromNodeId: string;
362
+ nodesById: Map<string, CloudVoiceFlowNode>;
363
+ edgesBySource: Map<string, CloudVoiceFlowEdge[]>;
364
+ toolsById: Map<string, CloudVoiceToolBinding>;
365
+ referencedToolIds: Set<string>;
366
+ visited?: Set<string>;
367
+ }): {
368
+ steps: string[];
369
+ toolIds: string[];
370
+ terminal: CloudVoiceFlowNode | null;
371
+ } {
372
+ const steps: string[] = [];
373
+ const toolIds: string[] = [];
374
+ const visited = new Set(input.visited ?? []);
375
+ let current = input.nodesById.get(input.fromNodeId) ?? null;
376
+
377
+ while (current && !visited.has(current.id)) {
378
+ visited.add(current.id);
379
+ const title = current.title ?? current.id;
380
+
381
+ if (current.type !== "initial") {
382
+ if (current.prompt) {
383
+ steps.push(`At "${title}", ${current.prompt}`);
384
+ } else if (current.type === "case") {
385
+ steps.push(`Handle "${title}" as the active case.`);
386
+ }
387
+ }
388
+
389
+ const currentToolIds = collectNodeToolIds(current);
390
+ if (current.type === "tool" && currentToolIds.length === 0 && current.title) {
391
+ const inferred = [...input.toolsById.keys()].find((toolId) => toolId === current?.title || toolId.replace(/[_-]/g, " ").toLowerCase() === current?.title?.toLowerCase());
392
+ if (inferred) currentToolIds.push(inferred);
393
+ }
394
+ for (const toolId of currentToolIds) {
395
+ input.referencedToolIds.add(toolId);
396
+ toolIds.push(toolId);
397
+ const tool = input.toolsById.get(toolId);
398
+ const requiredInputs = toolRequiredInputs(tool);
399
+ if (current.type === "sub_agent") {
400
+ steps.push(`Call sub-agent tool ${toolLabel(toolId, input.toolsById)}${requiredInputs ? ` after collecting: ${requiredInputs}` : ""}. Wait for its final result, read the returned \`outcome\`, and continue through the matching outcome branch.`);
401
+ if (current.outputs && current.outputs.length > 0) {
402
+ steps.push(`Sub-agent outcome branches: ${current.outputs.map((output) => `${output.label}${outputDescription(output) ? ` (${outputDescription(output)})` : ""}`).join("; ")}.`);
403
+ }
404
+ } else {
405
+ steps.push(`Use ${toolLabel(toolId, input.toolsById)}${requiredInputs ? ` after collecting: ${requiredInputs}` : ""}.`);
406
+ }
407
+ }
408
+
409
+ if (current.type === "end" || current.type === "transfer") {
410
+ return { steps, toolIds, terminal: current };
411
+ }
412
+
413
+ const edges = outgoingEdges(current.id, input.edgesBySource);
414
+ const defaultEdge = edges.find((edge) => !edge.fromPort) ?? edges[0] ?? null;
415
+ if (!defaultEdge) return { steps, toolIds, terminal: current };
416
+
417
+ const output = outputForEdge(current, defaultEdge);
418
+ if (output) {
419
+ const condition = outputDescription(output);
420
+ steps.push(`If "${output.label}"${condition ? ` (${condition})` : ""} matches inside this step, continue.`);
421
+ }
422
+ current = input.nodesById.get(defaultEdge.to) ?? null;
423
+ }
424
+
425
+ return { steps: [...steps, "Stop following this route if it loops back to a node already visited."], toolIds, terminal: current };
426
+ }
427
+
428
+ function terminalInstruction(node: CloudVoiceFlowNode | null) {
429
+ if (!node) return "";
430
+ const title = node.title ?? node.id;
431
+ if (node.type === "end") {
432
+ return node.prompt ? `Then end with: ${node.prompt}` : `Then end at "${title}" with a brief polite closing.`;
433
+ }
434
+ if (node.type === "transfer") {
435
+ const target = node.transferTarget || node.transferDestinationId || title;
436
+ return node.prompt ? `Then transfer/escalate to ${target}: ${node.prompt}` : `Then transfer/escalate to ${target} with a concise handoff summary.`;
437
+ }
438
+ return "";
439
+ }
440
+
441
+ export function compileVoiceGraphToInstructions(
442
+ graphInput: unknown,
443
+ options: { tools?: CloudVoiceToolBinding[]; entryMode?: CloudVoiceGraphEntryMode; channel?: CloudVoiceChannel } = {},
444
+ ): CompiledCloudVoiceFlowInstructions | null {
445
+ const graph = normalizeFlowGraph(graphInput);
446
+ if (!graph) return null;
447
+
448
+ const entryMode = options.entryMode ?? entryModeForChannel(options.channel);
449
+ const start = resolveFlowStartNode(graph, { entryMode });
450
+ const nodesById = new Map(graph.nodes.map((node) => [node.id, node] as const));
451
+ const edgesBySource = new Map<string, CloudVoiceFlowEdge[]>();
452
+ for (const edge of graph.edges) {
453
+ const list = edgesBySource.get(edge.from) ?? [];
454
+ list.push(edge);
455
+ edgesBySource.set(edge.from, list);
456
+ }
457
+
458
+ const toolsById = new Map((options.tools ?? []).map((tool) => [tool.id, tool] as const));
459
+ const referencedToolIds = new Set<string>();
460
+ const warnings: string[] = [];
461
+ const signature = graphSignature(graph);
462
+ const lines = [
463
+ start?.prompt || "You are a realtime voice agent. Help the caller clearly and concisely.",
464
+ "",
465
+ "Use this conversation flow as the source of truth for routing and tool usage.",
466
+ "Do not reveal internal node IDs, graph structure, or implementation details to the caller.",
467
+ "Choose the first route that clearly matches the caller's intent. If no route clearly matches, ask one concise clarifying question.",
468
+ ];
469
+ if (start?.firstMessage) {
470
+ lines.push(
471
+ `Opening rule: if you speak first, say exactly this as your first sentence before any other greeting or introduction: ${start.firstMessage}`,
472
+ "Do not prepend, replace, or append an internal agent name, agent number, slug, node name, or workflow name to that first sentence.",
473
+ );
474
+ }
475
+ if (start?.language) lines.push(`Use this language guidance when possible: ${start.language}.`);
476
+ if (start?.maxDuration !== undefined) lines.push(`Keep the call within ${String(start.maxDuration)} minutes unless the caller needs a short final clarification.`);
477
+
478
+ const startEdges = start ? outgoingEdges(start.id, edgesBySource) : [];
479
+ const routeEdges = start && start.outputs && start.outputs.length > 0
480
+ ? start.outputs.flatMap((output) => startEdges.filter((edge) => edge.fromPort === output.id).map((edge) => ({ output, edge })))
481
+ : startEdges.map((edge) => ({ output: outputForEdge(start ?? undefined, edge), edge }));
482
+
483
+ if (routeEdges.length > 0) {
484
+ lines.push("", "Routes:");
485
+ }
486
+ routeEdges.forEach(({ output, edge }, index) => {
487
+ const condition = output ? outputDescription(output) : "";
488
+ const routeName = output?.label ?? edgeTargetTitle(edge, nodesById);
489
+ const path = walkFlowPath({
490
+ fromNodeId: edge.to,
491
+ nodesById,
492
+ edgesBySource,
493
+ toolsById,
494
+ referencedToolIds,
495
+ visited: start ? new Set([start.id]) : undefined,
496
+ });
497
+ lines.push(
498
+ "",
499
+ `${index + 1}. When ${condition || routeName}:`,
500
+ );
501
+ if (output?.label && condition) lines.push(` Treat this as the "${output.label}" route.`);
502
+ if (path.steps.length === 0) {
503
+ lines.push(` Continue to "${routeName}" and handle it according to its instruction.`);
504
+ } else {
505
+ path.steps.forEach((step, stepIndex) => lines.push(` ${stepIndex + 1}. ${step}`));
506
+ }
507
+ const uniqueToolIds = [...new Set(path.toolIds)];
508
+ if (uniqueToolIds.length > 0) {
509
+ lines.push(` Use tools in this route: ${uniqueToolIds.map((toolId) => toolLabel(toolId, toolsById)).join(", ")}.`);
510
+ lines.push(" Never invent tool results. If a required tool is unavailable or returns no result, explain that limitation and ask for the next best detail.");
511
+ }
512
+ const terminal = terminalInstruction(path.terminal);
513
+ if (terminal) lines.push(` ${terminal}`);
514
+ });
515
+
516
+ if (routeEdges.length === 0 && start) {
517
+ lines.push("", "No explicit routes are connected. Follow the start node instructions, answer concisely, and ask one clarifying question when intent is unclear.");
518
+ }
519
+
520
+ lines.push(
521
+ "",
522
+ "General tool and closing rules:",
523
+ "- Use tools only when the matching route requires them or the caller provides enough context for that route.",
524
+ "- Collect required tool inputs before calling a tool.",
525
+ "- Summarize tool results in natural spoken language; do not read raw JSON or internal IDs unless they are useful to the caller.",
526
+ "- Before ending, ask whether the caller needs anything else unless the caller already clearly ended the conversation.",
527
+ );
528
+
529
+ const missingToolIds = [...referencedToolIds].filter((toolId) => !toolsById.has(toolId));
530
+ if (missingToolIds.length > 0) {
531
+ warnings.push(`Flow graph references tools that are not configured on this agent: ${missingToolIds.join(", ")}`);
532
+ lines.push("", `Unavailable graph tools: ${missingToolIds.join(", ")}. If these are needed, explain that the action is unavailable instead of inventing results.`);
533
+ }
534
+
535
+ const toolManifest = [...referencedToolIds]
536
+ .flatMap((toolId) => {
537
+ const tool = toolsById.get(toolId);
538
+ return tool ? [toolToManifest(tool)] : [];
539
+ });
540
+
541
+ return {
542
+ instructions: lines.join("\n"),
543
+ compilerVersion: CLOUD_VOICE_FLOW_INSTRUCTION_COMPILER_VERSION,
544
+ graphSignature: signature,
545
+ startNodeId: start?.id ?? null,
546
+ entryMode,
547
+ nodeCount: graph.nodes.length,
548
+ edgeCount: graph.edges.length,
549
+ referencedToolIds: [...referencedToolIds],
550
+ missingToolIds,
551
+ toolManifest,
552
+ warnings,
553
+ };
554
+ }
555
+
556
+ export function compileCloudVoiceGraphConfig(config: CloudVoiceAgentConfig): CloudVoiceAgentConfig {
557
+ const compiledFlow = compileVoiceGraphToInstructions(getRecord(config.metadata).flowGraph, { tools: config.tools });
558
+ if (!compiledFlow) return config;
559
+ const runtimeConfig = applyFlowStartRuntimeConfig(config);
560
+ return {
561
+ ...runtimeConfig,
562
+ instructions: compiledFlow.instructions,
563
+ metadata: {
564
+ ...getRecord(runtimeConfig.metadata),
565
+ flowGraphInstructionCompiler: {
566
+ version: compiledFlow.compilerVersion,
567
+ graphSignature: compiledFlow.graphSignature,
568
+ entryMode: compiledFlow.entryMode,
569
+ compiledAt: new Date().toISOString(),
570
+ },
571
+ },
572
+ };
573
+ }
574
+
575
+ function getFlowStartNode(config: Pick<CloudVoiceAgentConfig, "metadata">, options: { entryMode?: CloudVoiceGraphEntryMode } = {}) {
576
+ const flowGraph = getRecord(config.metadata).flowGraph;
577
+ const graph = getRecord(flowGraph);
578
+ const nodes = Array.isArray(graph.nodes) ? graph.nodes.map(getRecord) : [];
579
+ if (nodes.length === 0) return null;
580
+ const entryMode = options.entryMode ?? "both";
581
+ const initialNodes = nodes.filter((node) => getString(node.type) === "initial");
582
+ const matchingStart = entryMode !== "both"
583
+ ? initialNodes.find((node) => normalizeEntryMode(node.entryMode) === entryMode)
584
+ : null;
585
+ if (matchingStart) return matchingStart;
586
+ const bothStart = initialNodes.find((node) => normalizeEntryMode(node.entryMode) === "both");
587
+ if (bothStart) return bothStart;
588
+ const startNodeId = getString(graph.startNodeId);
589
+ return nodes.find((node) => getString(node.id) === startNodeId)
590
+ ?? initialNodes[0]
591
+ ?? nodes[0]
592
+ ?? null;
593
+ }
594
+
595
+ function isCloudVoiceProvider(value: string): value is CloudVoiceAgentConfig["provider"] {
596
+ return value === "openai-realtime"
597
+ || value === "gemini-live"
598
+ || value === "kognitive-voice"
599
+ || value === "xai-realtime";
600
+ }
601
+
602
+ function isCloudVoiceTransport(value: string): value is CloudVoiceAgentConfig["transport"] {
603
+ return value === "webrtc" || value === "websocket";
604
+ }
605
+
606
+ function applyFlowStartRuntimeConfig(config: CloudVoiceAgentConfig, options: { entryMode?: CloudVoiceGraphEntryMode } = {}): CloudVoiceAgentConfig {
607
+ const start = getFlowStartNode(config, options);
608
+ if (!start) return config;
609
+ const provider = getString(start.provider);
610
+ const transport = getString(start.transport);
611
+ return {
612
+ ...config,
613
+ ...(isCloudVoiceProvider(provider) ? { provider } : {}),
614
+ ...(getString(start.model) ? { model: getString(start.model) } : {}),
615
+ ...(getString(start.voice) ? { voice: getString(start.voice) } : {}),
616
+ ...(isCloudVoiceTransport(transport) ? { transport } : {}),
617
+ ...(start.providerOptions && typeof start.providerOptions === "object" && !Array.isArray(start.providerOptions)
618
+ ? { providerOptions: start.providerOptions as Record<string, unknown> }
619
+ : {}),
620
+ ...(start.transcription === null || (start.transcription && typeof start.transcription === "object" && !Array.isArray(start.transcription))
621
+ ? { transcription: start.transcription as Record<string, unknown> | null }
622
+ : {}),
623
+ ...(start.turnDetection === null || (start.turnDetection && typeof start.turnDetection === "object" && !Array.isArray(start.turnDetection))
624
+ ? { turnDetection: start.turnDetection as Record<string, unknown> | null }
625
+ : {}),
626
+ ...(start.inputNoiseReduction === null || (start.inputNoiseReduction && typeof start.inputNoiseReduction === "object" && !Array.isArray(start.inputNoiseReduction))
627
+ ? { inputNoiseReduction: start.inputNoiseReduction as Record<string, unknown> | null }
628
+ : {}),
629
+ ...(start.humanization && typeof start.humanization === "object" && !Array.isArray(start.humanization)
630
+ ? { humanization: start.humanization as CloudVoiceAgentConfig["humanization"] }
631
+ : {}),
632
+ metadata: {
633
+ ...getRecord(config.metadata),
634
+ ...(start.speech && typeof start.speech === "object" && !Array.isArray(start.speech)
635
+ ? { speech: start.speech as CloudVoiceSpeechConfig }
636
+ : {}),
637
+ },
638
+ };
639
+ }
640
+
641
+ export function normalizeClientToolManifests(value: unknown): CloudVoiceClientToolManifest[] {
642
+ if (!Array.isArray(value)) return [];
643
+ const tools: CloudVoiceClientToolManifest[] = [];
644
+ for (const item of value) {
645
+ if (!item || typeof item !== "object" || Array.isArray(item)) continue;
646
+ const record = item as Record<string, unknown>;
647
+ const id = getString(record.id, "");
648
+ if (!id || !/^[A-Za-z0-9_-]{1,64}$/.test(id)) continue;
649
+ const inputSchema = record.inputSchema && typeof record.inputSchema === "object" && !Array.isArray(record.inputSchema)
650
+ ? record.inputSchema as Record<string, unknown>
651
+ : { type: "object", additionalProperties: true };
652
+ tools.push({
653
+ id,
654
+ name: getString(record.name, id),
655
+ description: getString(record.description, getString(record.name, id)),
656
+ inputSchema,
657
+ });
658
+ }
659
+ return tools.slice(0, 32);
660
+ }
661
+
662
+ export function prepareCloudVoiceSessionConfig(config: CloudVoiceAgentConfig, input: {
663
+ agentName: string;
664
+ sessionId: string;
665
+ resourceId: Record<string, unknown>;
666
+ channel?: CloudVoiceChannel;
667
+ clientTools?: CloudVoiceClientToolManifest[];
668
+ parameters?: Record<string, unknown>;
669
+ parameterResolution?: CloudVoiceParameterResolutionResult;
670
+ }): PreparedCloudVoiceConfig {
671
+ const parameters = input.parameters ?? {};
672
+ const channel = input.channel ?? "web";
673
+ const entryMode = entryModeForChannel(channel);
674
+ const renderedConfig = renderCloudVoiceParameterObject(applyFlowStartRuntimeConfig(config, { entryMode }), parameters);
675
+ const compiledFlow = compileVoiceGraphToInstructions(getRecord(renderedConfig.metadata).flowGraph, {
676
+ tools: renderedConfig.tools,
677
+ entryMode,
678
+ });
679
+ const compiledMarker = getRecord(getRecord(config.metadata).flowGraphInstructionCompiler);
680
+ const useCompiledFlowAsAuthoredInstructions = Boolean(compiledMarker.version) && Boolean(compiledFlow?.instructions);
681
+ const promptConfig = useCompiledFlowAsAuthoredInstructions
682
+ ? { ...renderedConfig, instructions: compiledFlow?.instructions ?? renderedConfig.instructions }
683
+ : renderedConfig;
684
+ const instructionsAlreadyIncludeCompiledFlow = Boolean(compiledFlow?.instructions)
685
+ && getString(promptConfig.instructions, "").includes(compiledFlow?.instructions ?? "");
686
+ const shouldAppendCompiledFlow = Boolean(compiledFlow)
687
+ && !useCompiledFlowAsAuthoredInstructions
688
+ && !instructionsAlreadyIncludeCompiledFlow
689
+ && (
690
+ getString(compiledMarker.version, "") !== CLOUD_VOICE_FLOW_INSTRUCTION_COMPILER_VERSION
691
+ || getString(compiledMarker.graphSignature, "") !== compiledFlow?.graphSignature
692
+ || getString(compiledMarker.entryMode, "") !== entryMode
693
+ );
694
+ const providerOptions = getRecord(renderedConfig.providerOptions);
695
+ const speech = normalizeSpeechConfig(renderedConfig);
696
+ const languageCode = resolveCloudVoiceSpeechLanguageCode(renderedConfig);
697
+ const turnDetection = normalizeCloudVoiceTurnDetection(renderedConfig.provider, channel, renderedConfig.turnDetection);
698
+ const capabilities = getCloudVoiceProviderCapabilities(renderedConfig.provider);
699
+ const pipeline = renderedConfig.provider === "kognitive-voice" ? normalizeCloudVoicePipelineConfig(renderedConfig) : undefined;
700
+ const cartesiaTtsOptions = renderedConfig.provider === "kognitive-voice" ? compileCartesiaTtsOptions(renderedConfig) : undefined;
701
+ const maxOutputTokens = typeof providerOptions.maxOutputTokens === "number" && Number.isFinite(providerOptions.maxOutputTokens)
702
+ ? Math.max(1, Math.round(providerOptions.maxOutputTokens))
703
+ : typeof providerOptions.maxTokens === "number" && Number.isFinite(providerOptions.maxTokens)
704
+ ? Math.max(1, Math.round(providerOptions.maxTokens))
705
+ : "inf";
706
+ const temperature = typeof providerOptions.temperature === "number" && Number.isFinite(providerOptions.temperature)
707
+ ? Math.max(0, Math.min(2, providerOptions.temperature))
708
+ : undefined;
709
+ const toolManifestByName = new Map<string, CloudVoiceFunctionToolManifest>();
710
+
711
+ for (const tool of renderedConfig.tools) {
712
+ toolManifestByName.set(tool.id, {
713
+ type: "function",
714
+ name: tool.id,
715
+ description: tool.description ?? tool.name,
716
+ parameters: resolveToolInputSchema(tool),
717
+ });
718
+ }
719
+
720
+ for (const tool of input.clientTools ?? []) {
721
+ toolManifestByName.set(tool.id, {
722
+ type: "function",
723
+ name: tool.id,
724
+ description: tool.description ?? tool.name ?? tool.id,
725
+ parameters: tool.inputSchema ?? { type: "object", additionalProperties: true },
726
+ });
727
+ }
728
+
729
+ if (shouldInjectPhoneControlTools(channel)) {
730
+ toolManifestByName.set(CLOUD_VOICE_PHONE_HANGUP_TOOL.name, CLOUD_VOICE_PHONE_HANGUP_TOOL);
731
+ }
732
+
733
+ if (shouldInjectSipControlTools(config, channel)) {
734
+ toolManifestByName.set(CLOUD_VOICE_SIP_TRANSFER_TOOL.name, CLOUD_VOICE_SIP_TRANSFER_TOOL);
735
+ }
736
+
737
+ const toolManifest = Array.from(toolManifestByName.values())
738
+ .sort((left, right) => {
739
+ if (left.name === CLOUD_VOICE_PHONE_HANGUP_TOOL.name) return 1;
740
+ if (right.name === CLOUD_VOICE_PHONE_HANGUP_TOOL.name) return -1;
741
+ return 0;
742
+ });
743
+ const phoneControlRule = buildPhoneControlRule({ config: renderedConfig, channel });
744
+ const system = renderCloudVoiceParameterTemplate(compileCloudVoiceInstructions({
745
+ config: promptConfig,
746
+ agentName: input.agentName,
747
+ channel,
748
+ toolCount: toolManifest.length,
749
+ phoneControlRule,
750
+ compiledFlowInstructions: shouldAppendCompiledFlow ? compiledFlow?.instructions ?? "" : "",
751
+ }), parameters);
752
+ return {
753
+ name: input.agentName,
754
+ system,
755
+ runtime: {
756
+ provider: renderedConfig.provider,
757
+ mode: renderedConfig.provider === "kognitive-voice" ? "pipeline" : "realtime",
758
+ transport: pipeline?.transport.type ?? renderedConfig.transport,
759
+ model: pipeline?.llm.model ?? renderedConfig.model,
760
+ voice: pipeline?.tts.voice ?? renderedConfig.voice,
761
+ providerOptions: renderedConfig.providerOptions,
762
+ ...(pipeline ? { pipeline } : {}),
763
+ capabilities,
764
+ },
765
+ voiceConfig: {
766
+ system,
767
+ model: pipeline?.llm.model ?? renderedConfig.model,
768
+ voice: pipeline?.tts.voice ?? renderedConfig.voice,
769
+ ...(speech ? { speech } : {}),
770
+ ...(languageCode ? { languageCode } : {}),
771
+ turnDetection,
772
+ transcription: toPreparedTranscription(renderedConfig),
773
+ inputNoiseReduction: normalizeCloudVoiceInputNoiseReduction(renderedConfig.provider, channel, renderedConfig.inputNoiseReduction),
774
+ maxOutputTokens,
775
+ ...(temperature !== undefined ? { temperature } : {}),
776
+ ...(cartesiaTtsOptions ? { tts: { provider: "cartesia" as const, options: cartesiaTtsOptions } } : {}),
777
+ },
778
+ tools: [],
779
+ toolManifest,
780
+ resourceId: input.resourceId,
781
+ parameters,
782
+ metadata: {
783
+ cloudVoice: true,
784
+ sessionId: input.sessionId,
785
+ cloudVoiceConfigVersion: CLOUD_VOICE_CONFIG_VERSION,
786
+ promptCompilerVersion: CLOUD_VOICE_PROMPT_COMPILER_VERSION,
787
+ clientTools: (input.clientTools ?? []).map((tool) => tool.id),
788
+ transferDestinations: getTransferDestinations(renderedConfig).map((destination) => destination.id),
789
+ parameterKeys: Object.keys(parameters),
790
+ missingRequiredParameters: input.parameterResolution?.missingRequired ?? [],
791
+ sensitiveParameterKeys: input.parameterResolution?.sensitiveKeys ?? [],
792
+ ...(compiledFlow
793
+ ? {
794
+ flowGraph: {
795
+ startNodeId: compiledFlow.startNodeId,
796
+ entryMode: compiledFlow.entryMode,
797
+ nodeCount: compiledFlow.nodeCount,
798
+ edgeCount: compiledFlow.edgeCount,
799
+ referencedToolIds: compiledFlow.referencedToolIds,
800
+ missingToolIds: compiledFlow.missingToolIds,
801
+ },
802
+ }
803
+ : {}),
804
+ },
805
+ };
806
+ }
807
+
808
+ export function createPhonePrepareSnapshot(input: {
809
+ agent: CloudVoiceAgentSnapshot;
810
+ channel: CloudVoiceChannel;
811
+ config: CloudVoiceAgentConfig;
812
+ prepare: PreparedCloudVoiceConfig;
813
+ now?: Date;
814
+ }): CloudVoicePhonePrepareSnapshot {
815
+ return {
816
+ schemaVersion: 1,
817
+ createdAt: (input.now ?? new Date()).toISOString(),
818
+ channel: input.channel,
819
+ agent: {
820
+ id: input.agent.id,
821
+ slug: input.agent.slug,
822
+ name: input.agent.name,
823
+ version: input.agent.publishedVersion ?? input.agent.draftVersion,
824
+ },
825
+ runtime: input.prepare.runtime,
826
+ voiceConfig: input.prepare.voiceConfig,
827
+ toolManifest: input.prepare.toolManifest,
828
+ config: input.config,
829
+ parameters: input.prepare.parameters,
830
+ };
831
+ }