@kodelyth/voice-call 2026.5.42 → 2026.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (111) hide show
  1. package/package.json +16 -4
  2. package/api.ts +0 -16
  3. package/cli-metadata.ts +0 -10
  4. package/config-api.ts +0 -12
  5. package/index.test.ts +0 -1075
  6. package/index.ts +0 -863
  7. package/runtime-api.ts +0 -20
  8. package/runtime-entry.ts +0 -1
  9. package/setup-api.ts +0 -47
  10. package/src/allowlist.test.ts +0 -18
  11. package/src/allowlist.ts +0 -19
  12. package/src/cli.test.ts +0 -12
  13. package/src/cli.ts +0 -866
  14. package/src/config-compat.test.ts +0 -130
  15. package/src/config-compat.ts +0 -227
  16. package/src/config.test.ts +0 -542
  17. package/src/config.ts +0 -883
  18. package/src/core-bridge.ts +0 -14
  19. package/src/deep-merge.test.ts +0 -40
  20. package/src/deep-merge.ts +0 -23
  21. package/src/gateway-continue-operation.ts +0 -200
  22. package/src/http-headers.test.ts +0 -16
  23. package/src/http-headers.ts +0 -15
  24. package/src/manager/context.ts +0 -50
  25. package/src/manager/events.test.ts +0 -578
  26. package/src/manager/events.ts +0 -332
  27. package/src/manager/lifecycle.ts +0 -53
  28. package/src/manager/lookup.test.ts +0 -52
  29. package/src/manager/lookup.ts +0 -35
  30. package/src/manager/outbound.test.ts +0 -629
  31. package/src/manager/outbound.ts +0 -508
  32. package/src/manager/state.ts +0 -48
  33. package/src/manager/store.ts +0 -107
  34. package/src/manager/timers.test.ts +0 -127
  35. package/src/manager/timers.ts +0 -113
  36. package/src/manager/twiml.test.ts +0 -13
  37. package/src/manager/twiml.ts +0 -17
  38. package/src/manager.closed-loop.test.ts +0 -259
  39. package/src/manager.inbound-allowlist.test.ts +0 -183
  40. package/src/manager.notify.test.ts +0 -390
  41. package/src/manager.restore.test.ts +0 -310
  42. package/src/manager.test-harness.ts +0 -127
  43. package/src/manager.ts +0 -441
  44. package/src/media-stream.test.ts +0 -953
  45. package/src/media-stream.ts +0 -876
  46. package/src/providers/base.ts +0 -99
  47. package/src/providers/mock.test.ts +0 -86
  48. package/src/providers/mock.ts +0 -185
  49. package/src/providers/plivo.test.ts +0 -93
  50. package/src/providers/plivo.ts +0 -601
  51. package/src/providers/shared/call-status.test.ts +0 -24
  52. package/src/providers/shared/call-status.ts +0 -24
  53. package/src/providers/shared/guarded-json-api.test.ts +0 -127
  54. package/src/providers/shared/guarded-json-api.ts +0 -49
  55. package/src/providers/telnyx.test.ts +0 -489
  56. package/src/providers/telnyx.ts +0 -419
  57. package/src/providers/twilio/api.test.ts +0 -184
  58. package/src/providers/twilio/api.ts +0 -100
  59. package/src/providers/twilio/twiml-policy.test.ts +0 -84
  60. package/src/providers/twilio/twiml-policy.ts +0 -87
  61. package/src/providers/twilio/webhook.ts +0 -34
  62. package/src/providers/twilio.test.ts +0 -607
  63. package/src/providers/twilio.ts +0 -861
  64. package/src/providers/twilio.types.ts +0 -17
  65. package/src/realtime-agent-context.test.ts +0 -101
  66. package/src/realtime-agent-context.ts +0 -149
  67. package/src/realtime-defaults.ts +0 -3
  68. package/src/realtime-fast-context.test.ts +0 -74
  69. package/src/realtime-fast-context.ts +0 -27
  70. package/src/realtime-transcription.runtime.ts +0 -4
  71. package/src/realtime-voice.runtime.ts +0 -5
  72. package/src/response-generator.test.ts +0 -385
  73. package/src/response-generator.ts +0 -348
  74. package/src/response-model.test.ts +0 -71
  75. package/src/response-model.ts +0 -23
  76. package/src/runtime.test.ts +0 -625
  77. package/src/runtime.ts +0 -528
  78. package/src/telephony-audio.test.ts +0 -61
  79. package/src/telephony-audio.ts +0 -12
  80. package/src/telephony-tts.test.ts +0 -196
  81. package/src/telephony-tts.ts +0 -235
  82. package/src/test-fixtures.ts +0 -82
  83. package/src/tts-provider-voice.test.ts +0 -34
  84. package/src/tts-provider-voice.ts +0 -21
  85. package/src/tunnel.test.ts +0 -173
  86. package/src/tunnel.ts +0 -314
  87. package/src/types.ts +0 -311
  88. package/src/utils.test.ts +0 -17
  89. package/src/utils.ts +0 -14
  90. package/src/voice-mapping.test.ts +0 -32
  91. package/src/voice-mapping.ts +0 -65
  92. package/src/webhook/realtime-audio-pacer.test.ts +0 -146
  93. package/src/webhook/realtime-audio-pacer.ts +0 -204
  94. package/src/webhook/realtime-handler.test.ts +0 -1450
  95. package/src/webhook/realtime-handler.ts +0 -1382
  96. package/src/webhook/stale-call-reaper.test.ts +0 -89
  97. package/src/webhook/stale-call-reaper.ts +0 -38
  98. package/src/webhook/stream-frame-adapter.test.ts +0 -187
  99. package/src/webhook/stream-frame-adapter.ts +0 -219
  100. package/src/webhook/tailscale.test.ts +0 -216
  101. package/src/webhook/tailscale.ts +0 -129
  102. package/src/webhook-exposure.test.ts +0 -33
  103. package/src/webhook-exposure.ts +0 -84
  104. package/src/webhook-security.test.ts +0 -813
  105. package/src/webhook-security.ts +0 -982
  106. package/src/webhook.hangup-once.lifecycle.test.ts +0 -179
  107. package/src/webhook.test.ts +0 -1615
  108. package/src/webhook.ts +0 -933
  109. package/src/webhook.types.ts +0 -5
  110. package/src/websocket-test-support.ts +0 -72
  111. package/tsconfig.json +0 -16
@@ -1,348 +0,0 @@
1
- /**
2
- * Voice call response generator - uses the embedded Pi agent for tool support.
3
- * Routes voice responses through the same agent infrastructure as messaging.
4
- */
5
-
6
- import crypto from "node:crypto";
7
- import { applyModelOverrideToSessionEntry } from "klaw/plugin-sdk/model-session-runtime";
8
- import { normalizeLowercaseStringOrEmpty } from "klaw/plugin-sdk/string-coerce-runtime";
9
- import type { SessionEntry } from "../api.js";
10
- import { resolveVoiceCallSessionKey, type VoiceCallConfig } from "./config.js";
11
- import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
12
- import { resolveVoiceResponseModel } from "./response-model.js";
13
-
14
- export type VoiceResponseParams = {
15
- /** Voice call config */
16
- voiceConfig: VoiceCallConfig;
17
- /** Core Klaw config */
18
- coreConfig: CoreConfig;
19
- /** Injected host agent runtime */
20
- agentRuntime: CoreAgentDeps;
21
- /** Call ID for session tracking */
22
- callId: string;
23
- /** Persisted call session key */
24
- sessionKey?: string;
25
- /** Caller's phone number */
26
- from: string;
27
- /** Conversation transcript */
28
- transcript: Array<{ speaker: "user" | "bot"; text: string }>;
29
- /** Latest user message */
30
- userMessage: string;
31
- };
32
-
33
- export type VoiceResponseResult = {
34
- text: string | null;
35
- error?: string;
36
- };
37
-
38
- type VoiceResponsePayload = {
39
- text?: string;
40
- isError?: boolean;
41
- isReasoning?: boolean;
42
- };
43
-
44
- function isRecord(value: unknown): value is Record<string, unknown> {
45
- return typeof value === "object" && value !== null && !Array.isArray(value);
46
- }
47
-
48
- function readExplicitToolsAllow(value: unknown): string[] | undefined {
49
- if (!isRecord(value)) {
50
- return undefined;
51
- }
52
-
53
- const allow = value.allow;
54
- if (!Array.isArray(allow)) {
55
- return undefined;
56
- }
57
-
58
- return allow.filter((entry): entry is string => typeof entry === "string");
59
- }
60
-
61
- function resolveVoiceAgentToolsAllow(config: CoreConfig, agentId: string): string[] | undefined {
62
- const agents = isRecord(config.agents) ? config.agents : undefined;
63
- const list = Array.isArray(agents?.list) ? agents.list : [];
64
- const agent = list.find((entry) => isRecord(entry) && entry.id === agentId);
65
- if (!isRecord(agent)) {
66
- return undefined;
67
- }
68
-
69
- return readExplicitToolsAllow(isRecord(agent.tools) ? agent.tools : undefined);
70
- }
71
-
72
- const VOICE_SPOKEN_OUTPUT_CONTRACT = [
73
- "Output format requirements:",
74
- '- Return only valid JSON in this exact shape: {"spoken":"..."}',
75
- "- Do not include markdown, code fences, planning text, or extra keys.",
76
- '- Put exactly what should be spoken to the caller into "spoken".',
77
- '- If there is nothing to say, return {"spoken":""}.',
78
- ].join("\n");
79
-
80
- function normalizeSpokenText(value: string): string | null {
81
- const normalized = value.replace(/\s+/g, " ").trim();
82
- return normalized.length > 0 ? normalized : null;
83
- }
84
-
85
- function tryParseSpokenJson(text: string): string | null {
86
- const candidates: string[] = [];
87
- const trimmed = text.trim();
88
- if (!trimmed) {
89
- return null;
90
- }
91
- candidates.push(trimmed);
92
-
93
- const fenced = trimmed.match(/^```(?:json)?\s*([\s\S]*?)\s*```$/i);
94
- if (fenced?.[1]) {
95
- candidates.push(fenced[1]);
96
- }
97
-
98
- const firstBrace = trimmed.indexOf("{");
99
- const lastBrace = trimmed.lastIndexOf("}");
100
- if (firstBrace >= 0 && lastBrace > firstBrace) {
101
- candidates.push(trimmed.slice(firstBrace, lastBrace + 1));
102
- }
103
-
104
- for (const candidate of candidates) {
105
- try {
106
- const parsed = JSON.parse(candidate) as { spoken?: unknown };
107
- if (typeof parsed?.spoken !== "string") {
108
- continue;
109
- }
110
- return normalizeSpokenText(parsed.spoken) ?? "";
111
- } catch {
112
- // Continue trying other candidates.
113
- }
114
- }
115
-
116
- const inlineSpokenMatch = trimmed.match(/"spoken"\s*:\s*"((?:[^"\\]|\\.)*)"/i);
117
- if (!inlineSpokenMatch) {
118
- return null;
119
- }
120
-
121
- try {
122
- const decoded = JSON.parse(`"${inlineSpokenMatch[1] ?? ""}"`) as string;
123
- return normalizeSpokenText(decoded) ?? "";
124
- } catch {
125
- return null;
126
- }
127
- }
128
-
129
- function isLikelyMetaReasoningParagraph(paragraph: string): boolean {
130
- const lower = normalizeLowercaseStringOrEmpty(paragraph);
131
- if (!lower) {
132
- return false;
133
- }
134
-
135
- if (lower.startsWith("thinking process")) {
136
- return true;
137
- }
138
- if (lower.startsWith("reasoning:") || lower.startsWith("analysis:")) {
139
- return true;
140
- }
141
- if (
142
- lower.startsWith("the user ") &&
143
- (lower.includes("i should") || lower.includes("i need to") || lower.includes("i will"))
144
- ) {
145
- return true;
146
- }
147
- if (
148
- lower.includes("this is a natural continuation of the conversation") ||
149
- lower.includes("keep the conversation flowing")
150
- ) {
151
- return true;
152
- }
153
-
154
- return false;
155
- }
156
-
157
- function sanitizePlainSpokenText(text: string): string | null {
158
- const withoutCodeFences = text.replace(/```[\s\S]*?```/g, " ").trim();
159
- if (!withoutCodeFences) {
160
- return null;
161
- }
162
-
163
- const paragraphs = withoutCodeFences
164
- .split(/\n\s*\n+/)
165
- .map((paragraph) => paragraph.trim())
166
- .filter(Boolean);
167
-
168
- while (paragraphs.length > 1 && isLikelyMetaReasoningParagraph(paragraphs[0])) {
169
- paragraphs.shift();
170
- }
171
-
172
- return normalizeSpokenText(paragraphs.join(" "));
173
- }
174
-
175
- function extractSpokenTextFromPayloads(payloads: VoiceResponsePayload[]): string | null {
176
- const spokenSegments: string[] = [];
177
-
178
- for (const payload of payloads) {
179
- if (payload.isError || payload.isReasoning) {
180
- continue;
181
- }
182
-
183
- const rawText = payload.text?.trim() ?? "";
184
- if (!rawText) {
185
- continue;
186
- }
187
-
188
- const structured = tryParseSpokenJson(rawText);
189
- if (structured !== null) {
190
- if (structured.length > 0) {
191
- spokenSegments.push(structured);
192
- }
193
- continue;
194
- }
195
-
196
- const plain = sanitizePlainSpokenText(rawText);
197
- if (plain) {
198
- spokenSegments.push(plain);
199
- }
200
- }
201
-
202
- return spokenSegments.length > 0 ? spokenSegments.join(" ").trim() : null;
203
- }
204
-
205
- function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): string {
206
- const trimmed = sessionKey.trim();
207
- if (trimmed.toLowerCase().startsWith("agent:")) {
208
- return trimmed;
209
- }
210
- return `agent:${agentId}:${trimmed}`;
211
- }
212
-
213
- /**
214
- * Generate a voice response using the embedded Pi agent with full tool support.
215
- * Uses the same agent infrastructure as messaging for consistent behavior.
216
- */
217
- export async function generateVoiceResponse(
218
- params: VoiceResponseParams,
219
- ): Promise<VoiceResponseResult> {
220
- const {
221
- voiceConfig,
222
- callId,
223
- sessionKey,
224
- from,
225
- transcript,
226
- userMessage,
227
- coreConfig,
228
- agentRuntime,
229
- } = params;
230
-
231
- if (!coreConfig) {
232
- return { text: null, error: "Core config unavailable for voice response" };
233
- }
234
- const cfg = coreConfig;
235
-
236
- const resolvedSessionKey = resolveVoiceCallSessionKey({
237
- config: voiceConfig,
238
- callId,
239
- phone: from,
240
- explicitSessionKey: sessionKey,
241
- });
242
- const agentId = voiceConfig.agentId ?? "main";
243
- const toolsAllow = resolveVoiceAgentToolsAllow(cfg, agentId);
244
-
245
- // Resolve paths
246
- const storePath = agentRuntime.session.resolveStorePath(cfg.session?.store, { agentId });
247
- const agentDir = agentRuntime.resolveAgentDir(cfg, agentId);
248
- const workspaceDir = agentRuntime.resolveAgentWorkspaceDir(cfg, agentId);
249
-
250
- // Ensure workspace exists
251
- await agentRuntime.ensureAgentWorkspace({ dir: workspaceDir });
252
-
253
- // Load or create session entry
254
- const sessionStore = agentRuntime.session.loadSessionStore(storePath);
255
- const now = Date.now();
256
- const existingSessionEntry = sessionStore[resolvedSessionKey] as SessionEntry | undefined;
257
-
258
- // Resolve model from config
259
- const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime });
260
-
261
- let sessionEntry = existingSessionEntry;
262
- if (!sessionEntry?.sessionId || voiceConfig.responseModel) {
263
- sessionEntry = await agentRuntime.session.updateSessionStore(storePath, (store) => {
264
- let entry = store[resolvedSessionKey] as SessionEntry | undefined;
265
- if (!entry?.sessionId) {
266
- entry = {
267
- ...entry,
268
- sessionId: crypto.randomUUID(),
269
- updatedAt: now,
270
- };
271
- store[resolvedSessionKey] = entry;
272
- }
273
- if (voiceConfig.responseModel) {
274
- applyModelOverrideToSessionEntry({
275
- entry,
276
- selection: { provider, model },
277
- selectionSource: "auto",
278
- });
279
- }
280
- return entry;
281
- });
282
- }
283
- const sessionId = sessionEntry.sessionId;
284
-
285
- const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
286
- agentId,
287
- });
288
-
289
- // Resolve thinking level
290
- const thinkLevel = agentRuntime.resolveThinkingDefault({ cfg, provider, model });
291
-
292
- // Resolve agent identity for personalized prompt
293
- const identity = agentRuntime.resolveAgentIdentity(cfg, agentId);
294
- const agentName = identity?.name?.trim() || "assistant";
295
-
296
- // Build system prompt with conversation history
297
- const basePrompt =
298
- voiceConfig.responseSystemPrompt ??
299
- `You are ${agentName}, a helpful voice assistant on a phone call. Keep responses brief and conversational (1-2 sentences max). Be natural and friendly. The caller's phone number is ${from}. You have access to tools - use them when helpful.`;
300
-
301
- let extraSystemPrompt = basePrompt;
302
- if (transcript.length > 0) {
303
- const history = transcript
304
- .map((entry) => `${entry.speaker === "bot" ? "You" : "Caller"}: ${entry.text}`)
305
- .join("\n");
306
- extraSystemPrompt = `${basePrompt}\n\nConversation so far:\n${history}`;
307
- }
308
- extraSystemPrompt = `${extraSystemPrompt}\n\n${VOICE_SPOKEN_OUTPUT_CONTRACT}`;
309
-
310
- // Resolve timeout
311
- const timeoutMs = voiceConfig.responseTimeoutMs ?? agentRuntime.resolveAgentTimeoutMs({ cfg });
312
- const runId = `voice:${callId}:${Date.now()}`;
313
-
314
- try {
315
- const result = await agentRuntime.runEmbeddedPiAgent({
316
- sessionId,
317
- sessionKey: resolvedSessionKey,
318
- sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, resolvedSessionKey),
319
- agentId,
320
- messageProvider: "voice",
321
- sessionFile,
322
- workspaceDir,
323
- config: cfg,
324
- prompt: userMessage,
325
- provider,
326
- model,
327
- thinkLevel,
328
- verboseLevel: "off",
329
- timeoutMs,
330
- runId,
331
- lane: "voice",
332
- extraSystemPrompt,
333
- agentDir,
334
- toolsAllow,
335
- });
336
-
337
- const text = extractSpokenTextFromPayloads((result.payloads ?? []) as VoiceResponsePayload[]);
338
-
339
- if (!text && result.meta?.aborted) {
340
- return { text: null, error: "Response generation was aborted" };
341
- }
342
-
343
- return { text };
344
- } catch (err) {
345
- console.error(`[voice-call] Response generation failed:`, err);
346
- return { text: null, error: String(err) };
347
- }
348
- }
@@ -1,71 +0,0 @@
1
- import { describe, expect, it } from "vitest";
2
- import { VoiceCallConfigSchema } from "./config.js";
3
- import type { CoreAgentDeps } from "./core-bridge.js";
4
- import { resolveVoiceResponseModel } from "./response-model.js";
5
-
6
- const agentRuntime = {
7
- defaults: {
8
- provider: "together",
9
- model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
10
- },
11
- } as unknown as CoreAgentDeps;
12
-
13
- describe("resolveVoiceResponseModel", () => {
14
- it("falls back to the runtime default model", () => {
15
- expect(
16
- resolveVoiceResponseModel({
17
- voiceConfig: VoiceCallConfigSchema.parse({}),
18
- agentRuntime,
19
- }),
20
- ).toEqual({
21
- modelRef: "together/Qwen/Qwen2.5-7B-Instruct-Turbo",
22
- provider: "together",
23
- model: "Qwen/Qwen2.5-7B-Instruct-Turbo",
24
- });
25
- });
26
-
27
- it("uses an explicit provider/model ref", () => {
28
- expect(
29
- resolveVoiceResponseModel({
30
- voiceConfig: VoiceCallConfigSchema.parse({
31
- responseModel: "openai/gpt-5.4-mini",
32
- }),
33
- agentRuntime,
34
- }),
35
- ).toEqual({
36
- modelRef: "openai/gpt-5.4-mini",
37
- provider: "openai",
38
- model: "gpt-5.4-mini",
39
- });
40
- });
41
-
42
- it("uses the runtime default provider for bare model overrides", () => {
43
- expect(
44
- resolveVoiceResponseModel({
45
- voiceConfig: VoiceCallConfigSchema.parse({
46
- responseModel: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
47
- }),
48
- agentRuntime,
49
- }),
50
- ).toEqual({
51
- modelRef: "meta-llama/Llama-4-Scout-17B-16E-Instruct",
52
- provider: "meta-llama",
53
- model: "Llama-4-Scout-17B-16E-Instruct",
54
- });
55
- });
56
-
57
- it("keeps legacy single-segment overrides on the runtime default provider", () => {
58
- expect(
59
- resolveVoiceResponseModel({
60
- voiceConfig: VoiceCallConfigSchema.parse({
61
- responseModel: "gpt-5.4-mini",
62
- }),
63
- agentRuntime,
64
- }),
65
- ).toEqual({
66
- modelRef: "gpt-5.4-mini",
67
- provider: "together",
68
- model: "gpt-5.4-mini",
69
- });
70
- });
71
- });
@@ -1,23 +0,0 @@
1
- import type { VoiceCallConfig } from "./config.js";
2
- import type { CoreAgentDeps } from "./core-bridge.js";
3
-
4
- export function resolveVoiceResponseModel(params: {
5
- voiceConfig: VoiceCallConfig;
6
- agentRuntime: CoreAgentDeps;
7
- }): {
8
- modelRef: string;
9
- provider: string;
10
- model: string;
11
- } {
12
- const modelRef =
13
- params.voiceConfig.responseModel ??
14
- `${params.agentRuntime.defaults.provider}/${params.agentRuntime.defaults.model}`;
15
- const slashIndex = modelRef.indexOf("/");
16
-
17
- return {
18
- modelRef,
19
- provider:
20
- slashIndex === -1 ? params.agentRuntime.defaults.provider : modelRef.slice(0, slashIndex),
21
- model: slashIndex === -1 ? modelRef : modelRef.slice(slashIndex + 1),
22
- };
23
- }