@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -5
- package/api.ts +16 -0
- package/cli-metadata.ts +10 -0
- package/config-api.ts +12 -0
- package/index.test.ts +943 -0
- package/index.ts +379 -149
- package/openclaw.plugin.json +384 -157
- package/package.json +35 -5
- package/runtime-api.ts +20 -0
- package/runtime-entry.ts +1 -0
- package/setup-api.ts +47 -0
- package/src/allowlist.test.ts +18 -0
- package/src/cli.ts +533 -68
- package/src/config-compat.test.ts +120 -0
- package/src/config-compat.ts +227 -0
- package/src/config.test.ts +273 -12
- package/src/config.ts +355 -72
- package/src/core-bridge.ts +2 -147
- package/src/deep-merge.test.ts +40 -0
- package/src/gateway-continue-operation.ts +200 -0
- package/src/http-headers.ts +6 -3
- package/src/manager/context.ts +6 -5
- package/src/manager/events.test.ts +243 -19
- package/src/manager/events.ts +61 -31
- package/src/manager/lifecycle.ts +53 -0
- package/src/manager/lookup.test.ts +52 -0
- package/src/manager/outbound.test.ts +528 -0
- package/src/manager/outbound.ts +163 -57
- package/src/manager/store.ts +18 -6
- package/src/manager/timers.test.ts +129 -0
- package/src/manager/timers.ts +4 -3
- package/src/manager/twiml.test.ts +13 -0
- package/src/manager/twiml.ts +8 -0
- package/src/manager.closed-loop.test.ts +30 -12
- package/src/manager.inbound-allowlist.test.ts +77 -10
- package/src/manager.notify.test.ts +344 -20
- package/src/manager.restore.test.ts +95 -8
- package/src/manager.test-harness.ts +8 -6
- package/src/manager.ts +79 -5
- package/src/media-stream.test.ts +578 -81
- package/src/media-stream.ts +235 -54
- package/src/providers/base.ts +19 -0
- package/src/providers/mock.ts +7 -1
- package/src/providers/plivo.test.ts +50 -6
- package/src/providers/plivo.ts +14 -6
- package/src/providers/shared/call-status.ts +2 -1
- package/src/providers/shared/guarded-json-api.test.ts +106 -0
- package/src/providers/shared/guarded-json-api.ts +1 -1
- package/src/providers/telnyx.test.ts +178 -6
- package/src/providers/telnyx.ts +40 -3
- package/src/providers/twilio/api.test.ts +145 -0
- package/src/providers/twilio/api.ts +67 -16
- package/src/providers/twilio/twiml-policy.ts +6 -10
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.test.ts +425 -25
- package/src/providers/twilio.ts +230 -77
- package/src/providers/twilio.types.ts +17 -0
- package/src/realtime-defaults.ts +3 -0
- package/src/realtime-fast-context.test.ts +88 -0
- package/src/realtime-fast-context.ts +165 -0
- package/src/realtime-transcription.runtime.ts +4 -0
- package/src/realtime-voice.runtime.ts +5 -0
- package/src/response-generator.test.ts +321 -0
- package/src/response-generator.ts +213 -53
- package/src/response-model.test.ts +71 -0
- package/src/response-model.ts +23 -0
- package/src/runtime.test.ts +429 -0
- package/src/runtime.ts +270 -24
- package/src/telephony-audio.test.ts +61 -0
- package/src/telephony-audio.ts +1 -79
- package/src/telephony-tts.test.ts +133 -12
- package/src/telephony-tts.ts +155 -2
- package/src/test-fixtures.ts +28 -7
- package/src/tts-provider-voice.test.ts +34 -0
- package/src/tts-provider-voice.ts +21 -0
- package/src/tunnel.test.ts +166 -0
- package/src/tunnel.ts +1 -1
- package/src/types.ts +24 -37
- package/src/utils.test.ts +17 -0
- package/src/voice-mapping.test.ts +34 -0
- package/src/voice-mapping.ts +3 -2
- package/src/webhook/realtime-handler.test.ts +598 -0
- package/src/webhook/realtime-handler.ts +485 -0
- package/src/webhook/stale-call-reaper.test.ts +88 -0
- package/src/webhook/stale-call-reaper.ts +5 -0
- package/src/webhook/tailscale.test.ts +214 -0
- package/src/webhook/tailscale.ts +19 -5
- package/src/webhook-exposure.test.ts +33 -0
- package/src/webhook-exposure.ts +84 -0
- package/src/webhook-security.test.ts +172 -21
- package/src/webhook-security.ts +43 -29
- package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
- package/src/webhook.test.ts +1145 -27
- package/src/webhook.ts +523 -102
- package/src/webhook.types.ts +5 -0
- package/src/websocket-test-support.ts +72 -0
- package/tsconfig.json +16 -0
- package/CHANGELOG.md +0 -121
- package/src/providers/index.ts +0 -10
- package/src/providers/stt-openai-realtime.test.ts +0 -42
- package/src/providers/stt-openai-realtime.ts +0 -311
- package/src/providers/tts-openai.test.ts +0 -43
- package/src/providers/tts-openai.ts +0 -221
package/src/manager/outbound.ts
CHANGED
|
@@ -1,6 +1,13 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
-
import
|
|
2
|
+
import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
|
|
3
3
|
import {
|
|
4
|
+
resolveVoiceCallEffectiveConfig,
|
|
5
|
+
resolveVoiceCallSessionKey,
|
|
6
|
+
type CallMode,
|
|
7
|
+
} from "../config.js";
|
|
8
|
+
import { resolvePreferredTtsVoice } from "../tts-provider-voice.js";
|
|
9
|
+
import {
|
|
10
|
+
type EndReason,
|
|
4
11
|
TerminalStates,
|
|
5
12
|
type CallId,
|
|
6
13
|
type CallRecord,
|
|
@@ -8,16 +15,12 @@ import {
|
|
|
8
15
|
} from "../types.js";
|
|
9
16
|
import { mapVoiceToPolly } from "../voice-mapping.js";
|
|
10
17
|
import type { CallManagerContext } from "./context.js";
|
|
18
|
+
import { finalizeCall } from "./lifecycle.js";
|
|
11
19
|
import { getCallByProviderCallId } from "./lookup.js";
|
|
12
20
|
import { addTranscriptEntry, transitionState } from "./state.js";
|
|
13
21
|
import { persistCallRecord } from "./store.js";
|
|
14
|
-
import {
|
|
15
|
-
|
|
16
|
-
clearTranscriptWaiter,
|
|
17
|
-
rejectTranscriptWaiter,
|
|
18
|
-
waitForFinalTranscript,
|
|
19
|
-
} from "./timers.js";
|
|
20
|
-
import { generateNotifyTwiml } from "./twiml.js";
|
|
22
|
+
import { clearTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
|
|
23
|
+
import { generateDtmfRedirectTwiml, generateNotifyTwiml } from "./twiml.js";
|
|
21
24
|
|
|
22
25
|
type InitiateContext = Pick<
|
|
23
26
|
CallManagerContext,
|
|
@@ -39,6 +42,7 @@ type ConversationContext = Pick<
|
|
|
39
42
|
| "activeTurnCalls"
|
|
40
43
|
| "transcriptWaiters"
|
|
41
44
|
| "maxDurationTimers"
|
|
45
|
+
| "initialMessageInFlight"
|
|
42
46
|
>;
|
|
43
47
|
|
|
44
48
|
type EndCallContext = Pick<
|
|
@@ -102,6 +106,12 @@ function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): Connec
|
|
|
102
106
|
};
|
|
103
107
|
}
|
|
104
108
|
|
|
109
|
+
function validateDtmfDigits(digits: string): string | null {
|
|
110
|
+
return /^[0-9*#wWpP,]+$/.test(digits)
|
|
111
|
+
? null
|
|
112
|
+
: "digits may only contain digits, *, #, comma, w, p";
|
|
113
|
+
}
|
|
114
|
+
|
|
105
115
|
export async function initiateCall(
|
|
106
116
|
ctx: InitiateContext,
|
|
107
117
|
to: string,
|
|
@@ -112,6 +122,20 @@ export async function initiateCall(
|
|
|
112
122
|
typeof options === "string" ? { message: options } : (options ?? {});
|
|
113
123
|
const initialMessage = opts.message;
|
|
114
124
|
const mode = opts.mode ?? ctx.config.outbound.defaultMode;
|
|
125
|
+
const dtmfSequence = opts.dtmfSequence;
|
|
126
|
+
if (dtmfSequence) {
|
|
127
|
+
const validationError = validateDtmfDigits(dtmfSequence);
|
|
128
|
+
if (validationError) {
|
|
129
|
+
return { callId: "", success: false, error: validationError };
|
|
130
|
+
}
|
|
131
|
+
if (mode !== "conversation") {
|
|
132
|
+
return {
|
|
133
|
+
callId: "",
|
|
134
|
+
success: false,
|
|
135
|
+
error: "dtmfSequence requires conversation mode",
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
}
|
|
115
139
|
|
|
116
140
|
if (!ctx.provider) {
|
|
117
141
|
return { callId: "", success: false, error: "Provider not initialized" };
|
|
@@ -142,7 +166,12 @@ export async function initiateCall(
|
|
|
142
166
|
state: "initiated",
|
|
143
167
|
from,
|
|
144
168
|
to,
|
|
145
|
-
sessionKey
|
|
169
|
+
sessionKey: resolveVoiceCallSessionKey({
|
|
170
|
+
config: ctx.config,
|
|
171
|
+
callId,
|
|
172
|
+
phone: to,
|
|
173
|
+
explicitSessionKey: sessionKey,
|
|
174
|
+
}),
|
|
146
175
|
startedAt: Date.now(),
|
|
147
176
|
transcript: [],
|
|
148
177
|
processedEventIds: [],
|
|
@@ -158,10 +187,16 @@ export async function initiateCall(
|
|
|
158
187
|
try {
|
|
159
188
|
// For notify mode with a message, use inline TwiML with <Say>.
|
|
160
189
|
let inlineTwiml: string | undefined;
|
|
190
|
+
let preConnectTwiml: string | undefined;
|
|
161
191
|
if (mode === "notify" && initialMessage) {
|
|
162
|
-
const pollyVoice = mapVoiceToPolly(ctx.config
|
|
192
|
+
const pollyVoice = mapVoiceToPolly(resolvePreferredTtsVoice(ctx.config));
|
|
163
193
|
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
|
|
164
194
|
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
|
195
|
+
} else if (dtmfSequence) {
|
|
196
|
+
preConnectTwiml = generateDtmfRedirectTwiml(dtmfSequence, ctx.webhookUrl);
|
|
197
|
+
console.log(
|
|
198
|
+
`[voice-call] Using pre-connect DTMF TwiML for call ${callId} (digits=${dtmfSequence.length}, initialMessage=${initialMessage ? "yes" : "no"})`,
|
|
199
|
+
);
|
|
165
200
|
}
|
|
166
201
|
|
|
167
202
|
const result = await ctx.provider.initiateCall({
|
|
@@ -170,27 +205,28 @@ export async function initiateCall(
|
|
|
170
205
|
to,
|
|
171
206
|
webhookUrl: ctx.webhookUrl,
|
|
172
207
|
inlineTwiml,
|
|
208
|
+
preConnectTwiml,
|
|
173
209
|
});
|
|
174
210
|
|
|
175
211
|
callRecord.providerCallId = result.providerCallId;
|
|
176
212
|
ctx.providerCallIdMap.set(result.providerCallId, callId);
|
|
177
213
|
persistCallRecord(ctx.storePath, callRecord);
|
|
214
|
+
console.log(
|
|
215
|
+
`[voice-call] Outbound call initiated: callId=${callId} providerCallId=${result.providerCallId} mode=${mode} preConnectDtmf=${preConnectTwiml ? "yes" : "no"} initialMessage=${initialMessage ? "yes" : "no"}`,
|
|
216
|
+
);
|
|
178
217
|
|
|
179
218
|
return { callId, success: true };
|
|
180
219
|
} catch (err) {
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
if (callRecord.providerCallId) {
|
|
187
|
-
ctx.providerCallIdMap.delete(callRecord.providerCallId);
|
|
188
|
-
}
|
|
220
|
+
finalizeCall({
|
|
221
|
+
ctx,
|
|
222
|
+
call: callRecord,
|
|
223
|
+
endReason: "failed",
|
|
224
|
+
});
|
|
189
225
|
|
|
190
226
|
return {
|
|
191
227
|
callId,
|
|
192
228
|
success: false,
|
|
193
|
-
error:
|
|
229
|
+
error: formatErrorMessage(err),
|
|
194
230
|
};
|
|
195
231
|
}
|
|
196
232
|
}
|
|
@@ -210,9 +246,11 @@ export async function speak(
|
|
|
210
246
|
transitionState(call, "speaking");
|
|
211
247
|
persistCallRecord(ctx.storePath, call);
|
|
212
248
|
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
const voice =
|
|
249
|
+
const numberRouteKey =
|
|
250
|
+
typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
|
|
251
|
+
const voice = resolvePreferredTtsVoice(
|
|
252
|
+
resolveVoiceCallEffectiveConfig(ctx.config, numberRouteKey).config,
|
|
253
|
+
);
|
|
216
254
|
await provider.playTts({
|
|
217
255
|
callId,
|
|
218
256
|
providerCallId,
|
|
@@ -220,9 +258,57 @@ export async function speak(
|
|
|
220
258
|
voice,
|
|
221
259
|
});
|
|
222
260
|
|
|
261
|
+
addTranscriptEntry(call, "bot", text);
|
|
262
|
+
persistCallRecord(ctx.storePath, call);
|
|
263
|
+
|
|
264
|
+
return { success: true };
|
|
265
|
+
} catch (err) {
|
|
266
|
+
// A failed playback should not leave the call stuck in speaking state.
|
|
267
|
+
transitionState(call, "listening");
|
|
268
|
+
persistCallRecord(ctx.storePath, call);
|
|
269
|
+
return { success: false, error: formatErrorMessage(err) };
|
|
270
|
+
}
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
function shouldStartListeningAfterInitialMessage(ctx: ConversationContext): boolean {
|
|
274
|
+
if (ctx.provider?.name !== "twilio") {
|
|
275
|
+
return true;
|
|
276
|
+
}
|
|
277
|
+
if (!ctx.config.streaming.enabled) {
|
|
278
|
+
return true;
|
|
279
|
+
}
|
|
280
|
+
const streamAwareProvider = ctx.provider as typeof ctx.provider & {
|
|
281
|
+
isConversationStreamConnectEnabled?: () => boolean;
|
|
282
|
+
};
|
|
283
|
+
return streamAwareProvider.isConversationStreamConnectEnabled?.() !== true;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
export async function sendDtmf(
|
|
287
|
+
ctx: SpeakContext,
|
|
288
|
+
callId: CallId,
|
|
289
|
+
digits: string,
|
|
290
|
+
): Promise<{ success: boolean; error?: string }> {
|
|
291
|
+
const validationError = validateDtmfDigits(digits);
|
|
292
|
+
if (validationError) {
|
|
293
|
+
return { success: false, error: validationError };
|
|
294
|
+
}
|
|
295
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
296
|
+
if (!connected.ok) {
|
|
297
|
+
return { success: false, error: connected.error };
|
|
298
|
+
}
|
|
299
|
+
if (!connected.provider.sendDtmf) {
|
|
300
|
+
return { success: false, error: `${connected.provider.name} does not support outbound DTMF` };
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
try {
|
|
304
|
+
await connected.provider.sendDtmf({
|
|
305
|
+
callId,
|
|
306
|
+
providerCallId: connected.providerCallId,
|
|
307
|
+
digits,
|
|
308
|
+
});
|
|
223
309
|
return { success: true };
|
|
224
310
|
} catch (err) {
|
|
225
|
-
return { success: false, error:
|
|
311
|
+
return { success: false, error: formatErrorMessage(err) };
|
|
226
312
|
}
|
|
227
313
|
}
|
|
228
314
|
|
|
@@ -248,29 +334,52 @@ export async function speakInitialMessage(
|
|
|
248
334
|
return;
|
|
249
335
|
}
|
|
250
336
|
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
}
|
|
256
|
-
|
|
257
|
-
console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
|
|
258
|
-
const result = await speak(ctx, call.callId, initialMessage);
|
|
259
|
-
if (!result.success) {
|
|
260
|
-
console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
|
|
337
|
+
if (ctx.initialMessageInFlight.has(call.callId)) {
|
|
338
|
+
console.log(
|
|
339
|
+
`[voice-call] speakInitialMessage: initial message already in flight for ${call.callId}`,
|
|
340
|
+
);
|
|
261
341
|
return;
|
|
262
342
|
}
|
|
343
|
+
ctx.initialMessageInFlight.add(call.callId);
|
|
263
344
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
345
|
+
try {
|
|
346
|
+
console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
|
|
347
|
+
const result = await speak(ctx, call.callId, initialMessage);
|
|
348
|
+
if (!result.success) {
|
|
349
|
+
console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
|
|
350
|
+
return;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
// Clear only after successful playback so transient provider failures can retry.
|
|
354
|
+
if (call.metadata) {
|
|
355
|
+
delete call.metadata.initialMessage;
|
|
356
|
+
persistCallRecord(ctx.storePath, call);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
if (mode === "notify") {
|
|
360
|
+
const delaySec = ctx.config.outbound.notifyHangupDelaySec;
|
|
361
|
+
console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
|
|
362
|
+
setTimeout(async () => {
|
|
363
|
+
const currentCall = ctx.activeCalls.get(call.callId);
|
|
364
|
+
if (currentCall && !TerminalStates.has(currentCall.state)) {
|
|
365
|
+
console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
|
|
366
|
+
await endCall(ctx, call.callId);
|
|
367
|
+
}
|
|
368
|
+
}, delaySec * 1000);
|
|
369
|
+
} else if (
|
|
370
|
+
mode === "conversation" &&
|
|
371
|
+
ctx.provider &&
|
|
372
|
+
shouldStartListeningAfterInitialMessage(ctx)
|
|
373
|
+
) {
|
|
374
|
+
transitionState(call, "listening");
|
|
375
|
+
persistCallRecord(ctx.storePath, call);
|
|
376
|
+
await ctx.provider.startListening({
|
|
377
|
+
callId: call.callId,
|
|
378
|
+
providerCallId,
|
|
379
|
+
});
|
|
380
|
+
}
|
|
381
|
+
} finally {
|
|
382
|
+
ctx.initialMessageInFlight.delete(call.callId);
|
|
274
383
|
}
|
|
275
384
|
}
|
|
276
385
|
|
|
@@ -316,7 +425,7 @@ export async function continueCall(
|
|
|
316
425
|
: 1;
|
|
317
426
|
|
|
318
427
|
call.metadata = {
|
|
319
|
-
...
|
|
428
|
+
...call.metadata,
|
|
320
429
|
turnCount,
|
|
321
430
|
lastTurnLatencyMs,
|
|
322
431
|
lastTurnListenWaitMs,
|
|
@@ -335,7 +444,7 @@ export async function continueCall(
|
|
|
335
444
|
|
|
336
445
|
return { success: true, transcript };
|
|
337
446
|
} catch (err) {
|
|
338
|
-
return { success: false, error:
|
|
447
|
+
return { success: false, error: formatErrorMessage(err) };
|
|
339
448
|
} finally {
|
|
340
449
|
ctx.activeTurnCalls.delete(callId);
|
|
341
450
|
clearTranscriptWaiter(ctx, callId);
|
|
@@ -345,6 +454,7 @@ export async function continueCall(
|
|
|
345
454
|
export async function endCall(
|
|
346
455
|
ctx: EndCallContext,
|
|
347
456
|
callId: CallId,
|
|
457
|
+
options?: { reason?: EndReason },
|
|
348
458
|
): Promise<{ success: boolean; error?: string }> {
|
|
349
459
|
const lookup = lookupConnectedCall(ctx, callId);
|
|
350
460
|
if (lookup.kind === "error") {
|
|
@@ -354,27 +464,23 @@ export async function endCall(
|
|
|
354
464
|
return { success: true };
|
|
355
465
|
}
|
|
356
466
|
const { call, providerCallId, provider } = lookup;
|
|
467
|
+
const reason = options?.reason ?? "hangup-bot";
|
|
357
468
|
|
|
358
469
|
try {
|
|
359
470
|
await provider.hangupCall({
|
|
360
471
|
callId,
|
|
361
472
|
providerCallId,
|
|
362
|
-
reason
|
|
473
|
+
reason,
|
|
363
474
|
});
|
|
364
475
|
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
clearMaxDurationTimer(ctx, callId);
|
|
371
|
-
rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
|
|
372
|
-
|
|
373
|
-
ctx.activeCalls.delete(callId);
|
|
374
|
-
ctx.providerCallIdMap.delete(providerCallId);
|
|
476
|
+
finalizeCall({
|
|
477
|
+
ctx,
|
|
478
|
+
call,
|
|
479
|
+
endReason: reason,
|
|
480
|
+
});
|
|
375
481
|
|
|
376
482
|
return { success: true };
|
|
377
483
|
} catch (err) {
|
|
378
|
-
return { success: false, error:
|
|
484
|
+
return { success: false, error: formatErrorMessage(err) };
|
|
379
485
|
}
|
|
380
486
|
}
|
package/src/manager/store.ts
CHANGED
|
@@ -3,13 +3,25 @@ import fsp from "node:fs/promises";
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { CallRecordSchema, TerminalStates, type CallId, type CallRecord } from "../types.js";
|
|
5
5
|
|
|
6
|
+
const pendingPersistWrites = new Set<Promise<void>>();
|
|
7
|
+
|
|
6
8
|
export function persistCallRecord(storePath: string, call: CallRecord): void {
|
|
7
9
|
const logPath = path.join(storePath, "calls.jsonl");
|
|
8
10
|
const line = `${JSON.stringify(call)}\n`;
|
|
9
11
|
// Fire-and-forget async write to avoid blocking event loop.
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
12
|
+
const write = fsp
|
|
13
|
+
.appendFile(logPath, line)
|
|
14
|
+
.catch((err) => {
|
|
15
|
+
console.error("[voice-call] Failed to persist call record:", err);
|
|
16
|
+
})
|
|
17
|
+
.finally(() => {
|
|
18
|
+
pendingPersistWrites.delete(write);
|
|
19
|
+
});
|
|
20
|
+
pendingPersistWrites.add(write);
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
export async function flushPendingCallRecordWritesForTest(): Promise<void> {
|
|
24
|
+
await Promise.allSettled(pendingPersistWrites);
|
|
13
25
|
}
|
|
14
26
|
|
|
15
27
|
export function loadActiveCallsFromStore(storePath: string): {
|
|
@@ -50,6 +62,9 @@ export function loadActiveCallsFromStore(storePath: string): {
|
|
|
50
62
|
const rejectedProviderCallIds = new Set<string>();
|
|
51
63
|
|
|
52
64
|
for (const [callId, call] of callMap) {
|
|
65
|
+
for (const eventId of call.processedEventIds) {
|
|
66
|
+
processedEventIds.add(eventId);
|
|
67
|
+
}
|
|
53
68
|
if (TerminalStates.has(call.state)) {
|
|
54
69
|
continue;
|
|
55
70
|
}
|
|
@@ -57,9 +72,6 @@ export function loadActiveCallsFromStore(storePath: string): {
|
|
|
57
72
|
if (call.providerCallId) {
|
|
58
73
|
providerCallIdMap.set(call.providerCallId, callId);
|
|
59
74
|
}
|
|
60
|
-
for (const eventId of call.processedEventIds) {
|
|
61
|
-
processedEventIds.add(eventId);
|
|
62
|
-
}
|
|
63
75
|
}
|
|
64
76
|
|
|
65
77
|
return { activeCalls, providerCallIdMap, processedEventIds, rejectedProviderCallIds };
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
|
|
3
|
+
const { persistCallRecordMock } = vi.hoisted(() => ({
|
|
4
|
+
persistCallRecordMock: vi.fn(),
|
|
5
|
+
}));
|
|
6
|
+
|
|
7
|
+
vi.mock("./store.js", () => ({
|
|
8
|
+
persistCallRecord: persistCallRecordMock,
|
|
9
|
+
}));
|
|
10
|
+
|
|
11
|
+
import {
|
|
12
|
+
clearMaxDurationTimer,
|
|
13
|
+
clearTranscriptWaiter,
|
|
14
|
+
rejectTranscriptWaiter,
|
|
15
|
+
resolveTranscriptWaiter,
|
|
16
|
+
startMaxDurationTimer,
|
|
17
|
+
waitForFinalTranscript,
|
|
18
|
+
} from "./timers.js";
|
|
19
|
+
|
|
20
|
+
describe("voice-call manager timers", () => {
|
|
21
|
+
beforeEach(() => {
|
|
22
|
+
vi.useFakeTimers();
|
|
23
|
+
vi.clearAllMocks();
|
|
24
|
+
});
|
|
25
|
+
|
|
26
|
+
afterEach(() => {
|
|
27
|
+
vi.useRealTimers();
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
it("starts and clears max duration timers, persisting timeout metadata before delegation", async () => {
|
|
31
|
+
const call = { id: "call-1", state: "active" };
|
|
32
|
+
const ctx = {
|
|
33
|
+
activeCalls: new Map([["call-1", call]]),
|
|
34
|
+
maxDurationTimers: new Map(),
|
|
35
|
+
config: { maxDurationSeconds: 5 },
|
|
36
|
+
storePath: "/tmp/voice-call",
|
|
37
|
+
};
|
|
38
|
+
const onTimeout = vi.fn(async () => {});
|
|
39
|
+
|
|
40
|
+
startMaxDurationTimer({
|
|
41
|
+
ctx: ctx as never,
|
|
42
|
+
callId: "call-1",
|
|
43
|
+
onTimeout,
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
expect(ctx.maxDurationTimers.has("call-1")).toBe(true);
|
|
47
|
+
|
|
48
|
+
await vi.advanceTimersByTimeAsync(5_000);
|
|
49
|
+
|
|
50
|
+
expect(call).toEqual({ id: "call-1", state: "active", endReason: "timeout" });
|
|
51
|
+
expect(persistCallRecordMock).toHaveBeenCalledWith("/tmp/voice-call", call);
|
|
52
|
+
expect(onTimeout).toHaveBeenCalledWith("call-1");
|
|
53
|
+
expect(ctx.maxDurationTimers.has("call-1")).toBe(false);
|
|
54
|
+
|
|
55
|
+
startMaxDurationTimer({
|
|
56
|
+
ctx: ctx as never,
|
|
57
|
+
callId: "call-1",
|
|
58
|
+
onTimeout,
|
|
59
|
+
});
|
|
60
|
+
clearMaxDurationTimer(ctx as never, "call-1");
|
|
61
|
+
expect(ctx.maxDurationTimers.has("call-1")).toBe(false);
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it("does not time out terminal calls", async () => {
|
|
65
|
+
const ctx = {
|
|
66
|
+
activeCalls: new Map([["call-1", { id: "call-1", state: "completed" }]]),
|
|
67
|
+
maxDurationTimers: new Map(),
|
|
68
|
+
config: { maxDurationSeconds: 5 },
|
|
69
|
+
storePath: "/tmp/voice-call",
|
|
70
|
+
};
|
|
71
|
+
const onTimeout = vi.fn(async () => {});
|
|
72
|
+
|
|
73
|
+
startMaxDurationTimer({
|
|
74
|
+
ctx: ctx as never,
|
|
75
|
+
callId: "call-1",
|
|
76
|
+
onTimeout,
|
|
77
|
+
});
|
|
78
|
+
|
|
79
|
+
await vi.advanceTimersByTimeAsync(5_000);
|
|
80
|
+
|
|
81
|
+
expect(persistCallRecordMock).not.toHaveBeenCalled();
|
|
82
|
+
expect(onTimeout).not.toHaveBeenCalled();
|
|
83
|
+
});
|
|
84
|
+
|
|
85
|
+
it("waits for transcripts, resolves matching tokens, rejects mismatches and timeouts", async () => {
|
|
86
|
+
const ctx = {
|
|
87
|
+
transcriptWaiters: new Map(),
|
|
88
|
+
config: { transcriptTimeoutMs: 1_000 },
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const pending = waitForFinalTranscript(ctx as never, "call-1", "turn-1");
|
|
92
|
+
expect(resolveTranscriptWaiter(ctx as never, "call-1", "ignored", "turn-2")).toBe(false);
|
|
93
|
+
expect(resolveTranscriptWaiter(ctx as never, "call-1", "final transcript", "turn-1")).toBe(
|
|
94
|
+
true,
|
|
95
|
+
);
|
|
96
|
+
await expect(pending).resolves.toBe("final transcript");
|
|
97
|
+
|
|
98
|
+
const another = waitForFinalTranscript(ctx as never, "call-2");
|
|
99
|
+
rejectTranscriptWaiter(ctx as never, "call-2", "provider failed");
|
|
100
|
+
await expect(another).rejects.toThrow("provider failed");
|
|
101
|
+
|
|
102
|
+
const timedOut = waitForFinalTranscript(ctx as never, "call-3").catch((error) => error);
|
|
103
|
+
await vi.advanceTimersByTimeAsync(1_000);
|
|
104
|
+
await expect(timedOut).resolves.toEqual(
|
|
105
|
+
expect.objectContaining({
|
|
106
|
+
message: "Timed out waiting for transcript after 1000ms",
|
|
107
|
+
}),
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
const toClear = waitForFinalTranscript(ctx as never, "call-4");
|
|
111
|
+
clearTranscriptWaiter(ctx as never, "call-4");
|
|
112
|
+
expect(ctx.transcriptWaiters.has("call-4")).toBe(false);
|
|
113
|
+
void toClear.catch(() => {});
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it("rejects duplicate transcript waiters for the same call", async () => {
|
|
117
|
+
const ctx = {
|
|
118
|
+
transcriptWaiters: new Map(),
|
|
119
|
+
config: { transcriptTimeoutMs: 1_000 },
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
const pending = waitForFinalTranscript(ctx as never, "call-1");
|
|
123
|
+
await expect(waitForFinalTranscript(ctx as never, "call-1")).rejects.toThrow(
|
|
124
|
+
"Already waiting for transcript",
|
|
125
|
+
);
|
|
126
|
+
rejectTranscriptWaiter(ctx as never, "call-1", "done");
|
|
127
|
+
await expect(pending).rejects.toThrow("done");
|
|
128
|
+
});
|
|
129
|
+
});
|
package/src/manager/timers.ts
CHANGED
|
@@ -27,12 +27,13 @@ export function startMaxDurationTimer(params: {
|
|
|
27
27
|
ctx: MaxDurationTimerContext;
|
|
28
28
|
callId: CallId;
|
|
29
29
|
onTimeout: (callId: CallId) => Promise<void>;
|
|
30
|
+
timeoutMs?: number;
|
|
30
31
|
}): void {
|
|
31
32
|
clearMaxDurationTimer(params.ctx, params.callId);
|
|
32
33
|
|
|
33
|
-
const maxDurationMs = params.ctx.config.maxDurationSeconds * 1000;
|
|
34
|
+
const maxDurationMs = params.timeoutMs ?? params.ctx.config.maxDurationSeconds * 1000;
|
|
34
35
|
console.log(
|
|
35
|
-
`[voice-call] Starting max duration timer (${
|
|
36
|
+
`[voice-call] Starting max duration timer (${Math.ceil(maxDurationMs / 1000)}s) for call ${params.callId}`,
|
|
36
37
|
);
|
|
37
38
|
|
|
38
39
|
const timer = setTimeout(async () => {
|
|
@@ -40,7 +41,7 @@ export function startMaxDurationTimer(params: {
|
|
|
40
41
|
const call = params.ctx.activeCalls.get(params.callId);
|
|
41
42
|
if (call && !TerminalStates.has(call.state)) {
|
|
42
43
|
console.log(
|
|
43
|
-
`[voice-call] Max duration reached (${
|
|
44
|
+
`[voice-call] Max duration reached (${Math.ceil(maxDurationMs / 1000)}s), ending call ${params.callId}`,
|
|
44
45
|
);
|
|
45
46
|
call.endReason = "timeout";
|
|
46
47
|
persistCallRecord(params.ctx.storePath, call);
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { generateNotifyTwiml } from "./twiml.js";
|
|
3
|
+
|
|
4
|
+
describe("generateNotifyTwiml", () => {
|
|
5
|
+
it("renders escaped xml with the requested voice", () => {
|
|
6
|
+
expect(generateNotifyTwiml(`Call <ended> & "logged"`, "Polly.Joanna"))
|
|
7
|
+
.toBe(`<?xml version="1.0" encoding="UTF-8"?>
|
|
8
|
+
<Response>
|
|
9
|
+
<Say voice="Polly.Joanna">Call <ended> & "logged"</Say>
|
|
10
|
+
<Hangup/>
|
|
11
|
+
</Response>`);
|
|
12
|
+
});
|
|
13
|
+
});
|
package/src/manager/twiml.ts
CHANGED
|
@@ -7,3 +7,11 @@ export function generateNotifyTwiml(message: string, voice: string): string {
|
|
|
7
7
|
<Hangup/>
|
|
8
8
|
</Response>`;
|
|
9
9
|
}
|
|
10
|
+
|
|
11
|
+
export function generateDtmfRedirectTwiml(digits: string, webhookUrl: string): string {
|
|
12
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
13
|
+
<Response>
|
|
14
|
+
<Play digits="${escapeXml(digits)}" />
|
|
15
|
+
<Redirect method="POST">${escapeXml(webhookUrl)}</Redirect>
|
|
16
|
+
</Response>`;
|
|
17
|
+
}
|
|
@@ -1,6 +1,25 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
2
|
import { createManagerHarness, FakeProvider, markCallAnswered } from "./manager.test-harness.js";
|
|
3
3
|
|
|
4
|
+
function requireCall(
|
|
5
|
+
manager: Awaited<ReturnType<typeof createManagerHarness>>["manager"],
|
|
6
|
+
callId: string,
|
|
7
|
+
) {
|
|
8
|
+
const call = manager.getCall(callId);
|
|
9
|
+
if (!call) {
|
|
10
|
+
throw new Error(`expected active call ${callId}`);
|
|
11
|
+
}
|
|
12
|
+
return call;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
function requireTurnToken(provider: Awaited<ReturnType<typeof createManagerHarness>>["provider"]) {
|
|
16
|
+
const firstStart = provider.startListeningCalls[0];
|
|
17
|
+
if (!firstStart?.turnToken) {
|
|
18
|
+
throw new Error("expected closed-loop turn to capture a turn token");
|
|
19
|
+
}
|
|
20
|
+
return firstStart.turnToken;
|
|
21
|
+
}
|
|
22
|
+
|
|
4
23
|
describe("CallManager closed-loop turns", () => {
|
|
5
24
|
it("completes a closed-loop turn without live audio", async () => {
|
|
6
25
|
const { manager, provider } = await createManagerHarness({
|
|
@@ -31,12 +50,12 @@ describe("CallManager closed-loop turns", () => {
|
|
|
31
50
|
expect(provider.startListeningCalls).toHaveLength(1);
|
|
32
51
|
expect(provider.stopListeningCalls).toHaveLength(1);
|
|
33
52
|
|
|
34
|
-
const call = manager
|
|
35
|
-
expect(call
|
|
53
|
+
const call = requireCall(manager, started.callId);
|
|
54
|
+
expect(call.transcript.map((entry) => entry.text)).toEqual([
|
|
36
55
|
"How can I help?",
|
|
37
56
|
"Please check status",
|
|
38
57
|
]);
|
|
39
|
-
const metadata =
|
|
58
|
+
const metadata = call.metadata ?? {};
|
|
40
59
|
expect(typeof metadata.lastTurnLatencyMs).toBe("number");
|
|
41
60
|
expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
|
|
42
61
|
expect(metadata.turnCount).toBe(1);
|
|
@@ -90,8 +109,7 @@ describe("CallManager closed-loop turns", () => {
|
|
|
90
109
|
const turnPromise = manager.continueCall(started.callId, "Prompt");
|
|
91
110
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
92
111
|
|
|
93
|
-
const expectedTurnToken = provider
|
|
94
|
-
expect(typeof expectedTurnToken).toBe("string");
|
|
112
|
+
const expectedTurnToken = requireTurnToken(provider);
|
|
95
113
|
|
|
96
114
|
manager.processEvent({
|
|
97
115
|
id: "evt-turn-token-bad",
|
|
@@ -125,8 +143,8 @@ describe("CallManager closed-loop turns", () => {
|
|
|
125
143
|
expect(turnResult.success).toBe(true);
|
|
126
144
|
expect(turnResult.transcript).toBe("final answer");
|
|
127
145
|
|
|
128
|
-
const call = manager
|
|
129
|
-
expect(call
|
|
146
|
+
const call = requireCall(manager, started.callId);
|
|
147
|
+
expect(call.transcript.map((entry) => entry.text)).toEqual(["Prompt", "final answer"]);
|
|
130
148
|
});
|
|
131
149
|
|
|
132
150
|
it("tracks latency metadata across multiple closed-loop turns", async () => {
|
|
@@ -167,14 +185,14 @@ describe("CallManager closed-loop turns", () => {
|
|
|
167
185
|
|
|
168
186
|
expect(secondResult.success).toBe(true);
|
|
169
187
|
|
|
170
|
-
const call = manager
|
|
171
|
-
expect(call
|
|
188
|
+
const call = requireCall(manager, started.callId);
|
|
189
|
+
expect(call.transcript.map((entry) => entry.text)).toEqual([
|
|
172
190
|
"First question",
|
|
173
191
|
"First answer",
|
|
174
192
|
"Second question",
|
|
175
193
|
"Second answer",
|
|
176
194
|
]);
|
|
177
|
-
const metadata =
|
|
195
|
+
const metadata = call.metadata ?? {};
|
|
178
196
|
expect(metadata.turnCount).toBe(2);
|
|
179
197
|
expect(typeof metadata.lastTurnLatencyMs).toBe("number");
|
|
180
198
|
expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
|
|
@@ -209,8 +227,8 @@ describe("CallManager closed-loop turns", () => {
|
|
|
209
227
|
expect(result.transcript).toBe(`Answer ${i}`);
|
|
210
228
|
}
|
|
211
229
|
|
|
212
|
-
const call = manager
|
|
213
|
-
const metadata =
|
|
230
|
+
const call = requireCall(manager, started.callId);
|
|
231
|
+
const metadata = call.metadata ?? {};
|
|
214
232
|
expect(metadata.turnCount).toBe(5);
|
|
215
233
|
expect(provider.startListeningCalls).toHaveLength(5);
|
|
216
234
|
expect(provider.stopListeningCalls).toHaveLength(5);
|