@openclaw/voice-call 2026.2.14 → 2026.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/README.md +20 -0
- package/index.ts +2 -2
- package/package.json +1 -1
- package/src/cli.ts +92 -1
- package/src/config.test.ts +5 -1
- package/src/config.ts +15 -75
- package/src/manager/context.ts +1 -0
- package/src/manager/events.test.ts +26 -13
- package/src/manager/events.ts +1 -1
- package/src/manager/outbound.ts +36 -2
- package/src/manager/timers.ts +4 -3
- package/src/manager.test.ts +225 -4
- package/src/manager.ts +4 -2
- package/src/media-stream.test.ts +1 -1
- package/src/providers/plivo.ts +1 -1
- package/src/providers/telnyx.test.ts +33 -48
- package/src/providers/telnyx.ts +1 -1
- package/src/providers/twilio/webhook.ts +1 -1
- package/src/providers/twilio.ts +2 -2
- package/src/runtime.ts +3 -3
- package/src/webhook.test.ts +118 -0
- package/src/webhook.ts +51 -2
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,23 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 2026.2.17
|
|
4
|
+
|
|
5
|
+
### Changes
|
|
6
|
+
|
|
7
|
+
- Version alignment with core OpenClaw release numbers.
|
|
8
|
+
|
|
9
|
+
## 2026.2.16
|
|
10
|
+
|
|
11
|
+
### Changes
|
|
12
|
+
|
|
13
|
+
- Version alignment with core OpenClaw release numbers.
|
|
14
|
+
|
|
15
|
+
## 2026.2.15
|
|
16
|
+
|
|
17
|
+
### Changes
|
|
18
|
+
|
|
19
|
+
- Version alignment with core OpenClaw release numbers.
|
|
20
|
+
|
|
3
21
|
## 2026.2.14
|
|
4
22
|
|
|
5
23
|
### Changes
|
package/README.md
CHANGED
|
@@ -87,6 +87,26 @@ Notes:
|
|
|
87
87
|
- Telnyx requires `telnyx.publicKey` (or `TELNYX_PUBLIC_KEY`) unless `skipSignatureVerification` is true.
|
|
88
88
|
- `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
|
|
89
89
|
|
|
90
|
+
## Stale call reaper
|
|
91
|
+
|
|
92
|
+
Use `staleCallReaperSeconds` to end calls that never receive a terminal webhook
|
|
93
|
+
(for example, notify-mode calls that never complete). The default is `0`
|
|
94
|
+
(disabled).
|
|
95
|
+
|
|
96
|
+
Recommended ranges:
|
|
97
|
+
|
|
98
|
+
- **Production:** `120`–`300` seconds for notify-style flows.
|
|
99
|
+
- Keep this value **higher than `maxDurationSeconds`** so normal calls can
|
|
100
|
+
finish. A good starting point is `maxDurationSeconds + 30–60` seconds.
|
|
101
|
+
|
|
102
|
+
Example:
|
|
103
|
+
|
|
104
|
+
```json5
|
|
105
|
+
{
|
|
106
|
+
staleCallReaperSeconds: 360,
|
|
107
|
+
}
|
|
108
|
+
```
|
|
109
|
+
|
|
90
110
|
## TTS for calls
|
|
91
111
|
|
|
92
112
|
Voice Call uses the core `messages.tts` configuration (OpenAI or ElevenLabs) for
|
package/index.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
|
-
import type { GatewayRequestHandlerOptions, OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
2
1
|
import { Type } from "@sinclair/typebox";
|
|
3
|
-
import type {
|
|
2
|
+
import type { GatewayRequestHandlerOptions, OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
4
3
|
import { registerVoiceCallCli } from "./src/cli.js";
|
|
5
4
|
import {
|
|
6
5
|
VoiceCallConfigSchema,
|
|
@@ -8,6 +7,7 @@ import {
|
|
|
8
7
|
validateProviderConfig,
|
|
9
8
|
type VoiceCallConfig,
|
|
10
9
|
} from "./src/config.js";
|
|
10
|
+
import type { CoreConfig } from "./src/core-bridge.js";
|
|
11
11
|
import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
|
|
12
12
|
|
|
13
13
|
const voiceCallConfigSchema = {
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
|
-
import type { Command } from "commander";
|
|
2
1
|
import fs from "node:fs";
|
|
3
2
|
import os from "node:os";
|
|
4
3
|
import path from "node:path";
|
|
4
|
+
import type { Command } from "commander";
|
|
5
5
|
import { sleep } from "openclaw/plugin-sdk";
|
|
6
6
|
import type { VoiceCallConfig } from "./config.js";
|
|
7
7
|
import type { VoiceCallRuntime } from "./runtime.js";
|
|
@@ -41,6 +41,46 @@ function resolveDefaultStorePath(config: VoiceCallConfig): string {
|
|
|
41
41
|
return path.join(base, "calls.jsonl");
|
|
42
42
|
}
|
|
43
43
|
|
|
44
|
+
function percentile(values: number[], p: number): number {
|
|
45
|
+
if (values.length === 0) {
|
|
46
|
+
return 0;
|
|
47
|
+
}
|
|
48
|
+
const sorted = [...values].sort((a, b) => a - b);
|
|
49
|
+
const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
|
|
50
|
+
return sorted[idx] ?? 0;
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
function summarizeSeries(values: number[]): {
|
|
54
|
+
count: number;
|
|
55
|
+
minMs: number;
|
|
56
|
+
maxMs: number;
|
|
57
|
+
avgMs: number;
|
|
58
|
+
p50Ms: number;
|
|
59
|
+
p95Ms: number;
|
|
60
|
+
} {
|
|
61
|
+
if (values.length === 0) {
|
|
62
|
+
return { count: 0, minMs: 0, maxMs: 0, avgMs: 0, p50Ms: 0, p95Ms: 0 };
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
const minMs = values.reduce(
|
|
66
|
+
(min, value) => (value < min ? value : min),
|
|
67
|
+
Number.POSITIVE_INFINITY,
|
|
68
|
+
);
|
|
69
|
+
const maxMs = values.reduce(
|
|
70
|
+
(max, value) => (value > max ? value : max),
|
|
71
|
+
Number.NEGATIVE_INFINITY,
|
|
72
|
+
);
|
|
73
|
+
const avgMs = values.reduce((sum, value) => sum + value, 0) / values.length;
|
|
74
|
+
return {
|
|
75
|
+
count: values.length,
|
|
76
|
+
minMs,
|
|
77
|
+
maxMs,
|
|
78
|
+
avgMs,
|
|
79
|
+
p50Ms: percentile(values, 50),
|
|
80
|
+
p95Ms: percentile(values, 95),
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
|
|
44
84
|
export function registerVoiceCallCli(params: {
|
|
45
85
|
program: Command;
|
|
46
86
|
config: VoiceCallConfig;
|
|
@@ -216,6 +256,57 @@ export function registerVoiceCallCli(params: {
|
|
|
216
256
|
}
|
|
217
257
|
});
|
|
218
258
|
|
|
259
|
+
root
|
|
260
|
+
.command("latency")
|
|
261
|
+
.description("Summarize turn latency metrics from voice-call JSONL logs")
|
|
262
|
+
.option("--file <path>", "Path to calls.jsonl", resolveDefaultStorePath(config))
|
|
263
|
+
.option("--last <n>", "Analyze last N records", "200")
|
|
264
|
+
.action(async (options: { file: string; last?: string }) => {
|
|
265
|
+
const file = options.file;
|
|
266
|
+
const last = Math.max(1, Number(options.last ?? 200));
|
|
267
|
+
|
|
268
|
+
if (!fs.existsSync(file)) {
|
|
269
|
+
throw new Error("No log file at " + file);
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
const content = fs.readFileSync(file, "utf8");
|
|
273
|
+
const lines = content.split("\n").filter(Boolean).slice(-last);
|
|
274
|
+
|
|
275
|
+
const turnLatencyMs: number[] = [];
|
|
276
|
+
const listenWaitMs: number[] = [];
|
|
277
|
+
|
|
278
|
+
for (const line of lines) {
|
|
279
|
+
try {
|
|
280
|
+
const parsed = JSON.parse(line) as {
|
|
281
|
+
metadata?: { lastTurnLatencyMs?: unknown; lastTurnListenWaitMs?: unknown };
|
|
282
|
+
};
|
|
283
|
+
const latency = parsed.metadata?.lastTurnLatencyMs;
|
|
284
|
+
const listenWait = parsed.metadata?.lastTurnListenWaitMs;
|
|
285
|
+
if (typeof latency === "number" && Number.isFinite(latency)) {
|
|
286
|
+
turnLatencyMs.push(latency);
|
|
287
|
+
}
|
|
288
|
+
if (typeof listenWait === "number" && Number.isFinite(listenWait)) {
|
|
289
|
+
listenWaitMs.push(listenWait);
|
|
290
|
+
}
|
|
291
|
+
} catch {
|
|
292
|
+
// ignore malformed JSON lines
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// eslint-disable-next-line no-console
|
|
297
|
+
console.log(
|
|
298
|
+
JSON.stringify(
|
|
299
|
+
{
|
|
300
|
+
recordsScanned: lines.length,
|
|
301
|
+
turnLatency: summarizeSeries(turnLatencyMs),
|
|
302
|
+
listenWait: summarizeSeries(listenWaitMs),
|
|
303
|
+
},
|
|
304
|
+
null,
|
|
305
|
+
2,
|
|
306
|
+
),
|
|
307
|
+
);
|
|
308
|
+
});
|
|
309
|
+
|
|
219
310
|
root
|
|
220
311
|
.command("expose")
|
|
221
312
|
.description("Enable/disable Tailscale serve/funnel for the webhook")
|
package/src/config.test.ts
CHANGED
|
@@ -10,6 +10,7 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
|
|
|
10
10
|
allowFrom: [],
|
|
11
11
|
outbound: { defaultMode: "notify", notifyHangupDelaySec: 3 },
|
|
12
12
|
maxDurationSeconds: 300,
|
|
13
|
+
staleCallReaperSeconds: 600,
|
|
13
14
|
silenceTimeoutMs: 800,
|
|
14
15
|
transcriptTimeoutMs: 180000,
|
|
15
16
|
ringTimeoutMs: 30000,
|
|
@@ -32,7 +33,10 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
|
|
|
32
33
|
},
|
|
33
34
|
skipSignatureVerification: false,
|
|
34
35
|
stt: { provider: "openai", model: "whisper-1" },
|
|
35
|
-
tts: {
|
|
36
|
+
tts: {
|
|
37
|
+
provider: "openai",
|
|
38
|
+
openai: { model: "gpt-4o-mini-tts", voice: "coral" },
|
|
39
|
+
},
|
|
36
40
|
responseModel: "openai/gpt-4o-mini",
|
|
37
41
|
responseTimeoutMs: 30000,
|
|
38
42
|
};
|
package/src/config.ts
CHANGED
|
@@ -1,3 +1,9 @@
|
|
|
1
|
+
import {
|
|
2
|
+
TtsAutoSchema,
|
|
3
|
+
TtsConfigSchema,
|
|
4
|
+
TtsModeSchema,
|
|
5
|
+
TtsProviderSchema,
|
|
6
|
+
} from "openclaw/plugin-sdk";
|
|
1
7
|
import { z } from "zod";
|
|
2
8
|
|
|
3
9
|
// -----------------------------------------------------------------------------
|
|
@@ -77,81 +83,7 @@ export const SttConfigSchema = z
|
|
|
77
83
|
.default({ provider: "openai", model: "whisper-1" });
|
|
78
84
|
export type SttConfig = z.infer<typeof SttConfigSchema>;
|
|
79
85
|
|
|
80
|
-
export
|
|
81
|
-
export const TtsModeSchema = z.enum(["final", "all"]);
|
|
82
|
-
export const TtsAutoSchema = z.enum(["off", "always", "inbound", "tagged"]);
|
|
83
|
-
|
|
84
|
-
export const TtsConfigSchema = z
|
|
85
|
-
.object({
|
|
86
|
-
auto: TtsAutoSchema.optional(),
|
|
87
|
-
enabled: z.boolean().optional(),
|
|
88
|
-
mode: TtsModeSchema.optional(),
|
|
89
|
-
provider: TtsProviderSchema.optional(),
|
|
90
|
-
summaryModel: z.string().optional(),
|
|
91
|
-
modelOverrides: z
|
|
92
|
-
.object({
|
|
93
|
-
enabled: z.boolean().optional(),
|
|
94
|
-
allowText: z.boolean().optional(),
|
|
95
|
-
allowProvider: z.boolean().optional(),
|
|
96
|
-
allowVoice: z.boolean().optional(),
|
|
97
|
-
allowModelId: z.boolean().optional(),
|
|
98
|
-
allowVoiceSettings: z.boolean().optional(),
|
|
99
|
-
allowNormalization: z.boolean().optional(),
|
|
100
|
-
allowSeed: z.boolean().optional(),
|
|
101
|
-
})
|
|
102
|
-
.strict()
|
|
103
|
-
.optional(),
|
|
104
|
-
elevenlabs: z
|
|
105
|
-
.object({
|
|
106
|
-
apiKey: z.string().optional(),
|
|
107
|
-
baseUrl: z.string().optional(),
|
|
108
|
-
voiceId: z.string().optional(),
|
|
109
|
-
modelId: z.string().optional(),
|
|
110
|
-
seed: z.number().int().min(0).max(4294967295).optional(),
|
|
111
|
-
applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
|
|
112
|
-
languageCode: z.string().optional(),
|
|
113
|
-
voiceSettings: z
|
|
114
|
-
.object({
|
|
115
|
-
stability: z.number().min(0).max(1).optional(),
|
|
116
|
-
similarityBoost: z.number().min(0).max(1).optional(),
|
|
117
|
-
style: z.number().min(0).max(1).optional(),
|
|
118
|
-
useSpeakerBoost: z.boolean().optional(),
|
|
119
|
-
speed: z.number().min(0.5).max(2).optional(),
|
|
120
|
-
})
|
|
121
|
-
.strict()
|
|
122
|
-
.optional(),
|
|
123
|
-
})
|
|
124
|
-
.strict()
|
|
125
|
-
.optional(),
|
|
126
|
-
openai: z
|
|
127
|
-
.object({
|
|
128
|
-
apiKey: z.string().optional(),
|
|
129
|
-
model: z.string().optional(),
|
|
130
|
-
voice: z.string().optional(),
|
|
131
|
-
})
|
|
132
|
-
.strict()
|
|
133
|
-
.optional(),
|
|
134
|
-
edge: z
|
|
135
|
-
.object({
|
|
136
|
-
enabled: z.boolean().optional(),
|
|
137
|
-
voice: z.string().optional(),
|
|
138
|
-
lang: z.string().optional(),
|
|
139
|
-
outputFormat: z.string().optional(),
|
|
140
|
-
pitch: z.string().optional(),
|
|
141
|
-
rate: z.string().optional(),
|
|
142
|
-
volume: z.string().optional(),
|
|
143
|
-
saveSubtitles: z.boolean().optional(),
|
|
144
|
-
proxy: z.string().optional(),
|
|
145
|
-
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
|
146
|
-
})
|
|
147
|
-
.strict()
|
|
148
|
-
.optional(),
|
|
149
|
-
prefsPath: z.string().optional(),
|
|
150
|
-
maxTextLength: z.number().int().min(1).optional(),
|
|
151
|
-
timeoutMs: z.number().int().min(1000).max(120000).optional(),
|
|
152
|
-
})
|
|
153
|
-
.strict()
|
|
154
|
-
.optional();
|
|
86
|
+
export { TtsAutoSchema, TtsConfigSchema, TtsModeSchema, TtsProviderSchema };
|
|
155
87
|
export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
|
|
156
88
|
|
|
157
89
|
// -----------------------------------------------------------------------------
|
|
@@ -341,6 +273,14 @@ export const VoiceCallConfigSchema = z
|
|
|
341
273
|
/** Maximum call duration in seconds */
|
|
342
274
|
maxDurationSeconds: z.number().int().positive().default(300),
|
|
343
275
|
|
|
276
|
+
/**
|
|
277
|
+
* Maximum age of a call in seconds before it is automatically reaped.
|
|
278
|
+
* Catches calls stuck in unexpected states (e.g., notify-mode calls that
|
|
279
|
+
* never receive a terminal webhook). Set to 0 to disable.
|
|
280
|
+
* Default: 0 (disabled). Recommended: 120-300 for production.
|
|
281
|
+
*/
|
|
282
|
+
staleCallReaperSeconds: z.number().int().nonnegative().default(0),
|
|
283
|
+
|
|
344
284
|
/** Silence timeout for end-of-speech detection (ms) */
|
|
345
285
|
silenceTimeoutMs: z.number().int().positive().default(800),
|
|
346
286
|
|
package/src/manager/context.ts
CHANGED
|
@@ -2,9 +2,10 @@ import fs from "node:fs";
|
|
|
2
2
|
import os from "node:os";
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import { describe, expect, it } from "vitest";
|
|
5
|
+
import { VoiceCallConfigSchema } from "../config.js";
|
|
6
|
+
import type { VoiceCallProvider } from "../providers/base.js";
|
|
5
7
|
import type { HangupCallInput, NormalizedEvent } from "../types.js";
|
|
6
8
|
import type { CallManagerContext } from "./context.js";
|
|
7
|
-
import { VoiceCallConfigSchema } from "../config.js";
|
|
8
9
|
import { processEvent } from "./events.js";
|
|
9
10
|
|
|
10
11
|
function createContext(overrides: Partial<CallManagerContext> = {}): CallManagerContext {
|
|
@@ -23,21 +24,35 @@ function createContext(overrides: Partial<CallManagerContext> = {}): CallManager
|
|
|
23
24
|
}),
|
|
24
25
|
storePath,
|
|
25
26
|
webhookUrl: null,
|
|
27
|
+
activeTurnCalls: new Set(),
|
|
26
28
|
transcriptWaiters: new Map(),
|
|
27
29
|
maxDurationTimers: new Map(),
|
|
28
30
|
...overrides,
|
|
29
31
|
};
|
|
30
32
|
}
|
|
31
33
|
|
|
34
|
+
function createProvider(overrides: Partial<VoiceCallProvider> = {}): VoiceCallProvider {
|
|
35
|
+
return {
|
|
36
|
+
name: "plivo",
|
|
37
|
+
verifyWebhook: () => ({ ok: true }),
|
|
38
|
+
parseWebhookEvent: () => ({ events: [] }),
|
|
39
|
+
initiateCall: async () => ({ providerCallId: "provider-call-id", status: "initiated" }),
|
|
40
|
+
hangupCall: async () => {},
|
|
41
|
+
playTts: async () => {},
|
|
42
|
+
startListening: async () => {},
|
|
43
|
+
stopListening: async () => {},
|
|
44
|
+
...overrides,
|
|
45
|
+
};
|
|
46
|
+
}
|
|
47
|
+
|
|
32
48
|
describe("processEvent (functional)", () => {
|
|
33
49
|
it("calls provider hangup when rejecting inbound call", () => {
|
|
34
50
|
const hangupCalls: HangupCallInput[] = [];
|
|
35
|
-
const provider = {
|
|
36
|
-
|
|
37
|
-
async hangupCall(input: HangupCallInput): Promise<void> {
|
|
51
|
+
const provider = createProvider({
|
|
52
|
+
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
38
53
|
hangupCalls.push(input);
|
|
39
54
|
},
|
|
40
|
-
};
|
|
55
|
+
});
|
|
41
56
|
|
|
42
57
|
const ctx = createContext({
|
|
43
58
|
config: VoiceCallConfigSchema.parse({
|
|
@@ -98,12 +113,11 @@ describe("processEvent (functional)", () => {
|
|
|
98
113
|
|
|
99
114
|
it("calls hangup only once for duplicate events for same rejected call", () => {
|
|
100
115
|
const hangupCalls: HangupCallInput[] = [];
|
|
101
|
-
const provider = {
|
|
102
|
-
|
|
103
|
-
async hangupCall(input: HangupCallInput): Promise<void> {
|
|
116
|
+
const provider = createProvider({
|
|
117
|
+
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
104
118
|
hangupCalls.push(input);
|
|
105
119
|
},
|
|
106
|
-
};
|
|
120
|
+
});
|
|
107
121
|
const ctx = createContext({
|
|
108
122
|
config: VoiceCallConfigSchema.parse({
|
|
109
123
|
enabled: true,
|
|
@@ -208,12 +222,11 @@ describe("processEvent (functional)", () => {
|
|
|
208
222
|
});
|
|
209
223
|
|
|
210
224
|
it("when hangup throws, logs and does not throw", () => {
|
|
211
|
-
const provider = {
|
|
212
|
-
|
|
213
|
-
async hangupCall(): Promise<void> {
|
|
225
|
+
const provider = createProvider({
|
|
226
|
+
hangupCall: async (): Promise<void> => {
|
|
214
227
|
throw new Error("provider down");
|
|
215
228
|
},
|
|
216
|
-
};
|
|
229
|
+
});
|
|
217
230
|
const ctx = createContext({
|
|
218
231
|
config: VoiceCallConfigSchema.parse({
|
|
219
232
|
enabled: true,
|
package/src/manager/events.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
|
+
import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
|
|
2
3
|
import type { CallRecord, CallState, NormalizedEvent } from "../types.js";
|
|
3
4
|
import type { CallManagerContext } from "./context.js";
|
|
4
|
-
import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
|
|
5
5
|
import { findCall } from "./lookup.js";
|
|
6
6
|
import { endCall } from "./outbound.js";
|
|
7
7
|
import { addTranscriptEntry, transitionState } from "./state.js";
|
package/src/manager/outbound.ts
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
2
|
import type { CallMode } from "../config.js";
|
|
3
|
-
import type { CallManagerContext } from "./context.js";
|
|
4
3
|
import {
|
|
5
4
|
TerminalStates,
|
|
6
5
|
type CallId,
|
|
@@ -8,6 +7,7 @@ import {
|
|
|
8
7
|
type OutboundCallOptions,
|
|
9
8
|
} from "../types.js";
|
|
10
9
|
import { mapVoiceToPolly } from "../voice-mapping.js";
|
|
10
|
+
import type { CallManagerContext } from "./context.js";
|
|
11
11
|
import { getCallByProviderCallId } from "./lookup.js";
|
|
12
12
|
import { addTranscriptEntry, transitionState } from "./state.js";
|
|
13
13
|
import { persistCallRecord } from "./store.js";
|
|
@@ -36,6 +36,7 @@ type ConversationContext = Pick<
|
|
|
36
36
|
| "provider"
|
|
37
37
|
| "config"
|
|
38
38
|
| "storePath"
|
|
39
|
+
| "activeTurnCalls"
|
|
39
40
|
| "transcriptWaiters"
|
|
40
41
|
| "maxDurationTimers"
|
|
41
42
|
>;
|
|
@@ -158,7 +159,6 @@ export async function speak(
|
|
|
158
159
|
if (TerminalStates.has(call.state)) {
|
|
159
160
|
return { success: false, error: "Call has ended" };
|
|
160
161
|
}
|
|
161
|
-
|
|
162
162
|
try {
|
|
163
163
|
transitionState(call, "speaking");
|
|
164
164
|
persistCallRecord(ctx.storePath, call);
|
|
@@ -242,6 +242,12 @@ export async function continueCall(
|
|
|
242
242
|
if (TerminalStates.has(call.state)) {
|
|
243
243
|
return { success: false, error: "Call has ended" };
|
|
244
244
|
}
|
|
245
|
+
if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) {
|
|
246
|
+
return { success: false, error: "Already waiting for transcript" };
|
|
247
|
+
}
|
|
248
|
+
ctx.activeTurnCalls.add(callId);
|
|
249
|
+
|
|
250
|
+
const turnStartedAt = Date.now();
|
|
245
251
|
|
|
246
252
|
try {
|
|
247
253
|
await speak(ctx, callId, prompt);
|
|
@@ -249,17 +255,45 @@ export async function continueCall(
|
|
|
249
255
|
transitionState(call, "listening");
|
|
250
256
|
persistCallRecord(ctx.storePath, call);
|
|
251
257
|
|
|
258
|
+
const listenStartedAt = Date.now();
|
|
252
259
|
await ctx.provider.startListening({ callId, providerCallId: call.providerCallId });
|
|
253
260
|
|
|
254
261
|
const transcript = await waitForFinalTranscript(ctx, callId);
|
|
262
|
+
const transcriptReceivedAt = Date.now();
|
|
255
263
|
|
|
256
264
|
// Best-effort: stop listening after final transcript.
|
|
257
265
|
await ctx.provider.stopListening({ callId, providerCallId: call.providerCallId });
|
|
258
266
|
|
|
267
|
+
const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
|
|
268
|
+
const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
|
|
269
|
+
const turnCount =
|
|
270
|
+
call.metadata && typeof call.metadata.turnCount === "number"
|
|
271
|
+
? call.metadata.turnCount + 1
|
|
272
|
+
: 1;
|
|
273
|
+
|
|
274
|
+
call.metadata = {
|
|
275
|
+
...(call.metadata ?? {}),
|
|
276
|
+
turnCount,
|
|
277
|
+
lastTurnLatencyMs,
|
|
278
|
+
lastTurnListenWaitMs,
|
|
279
|
+
lastTurnCompletedAt: transcriptReceivedAt,
|
|
280
|
+
};
|
|
281
|
+
persistCallRecord(ctx.storePath, call);
|
|
282
|
+
|
|
283
|
+
console.log(
|
|
284
|
+
"[voice-call] continueCall latency call=" +
|
|
285
|
+
call.callId +
|
|
286
|
+
" totalMs=" +
|
|
287
|
+
String(lastTurnLatencyMs) +
|
|
288
|
+
" listenWaitMs=" +
|
|
289
|
+
String(lastTurnListenWaitMs),
|
|
290
|
+
);
|
|
291
|
+
|
|
259
292
|
return { success: true, transcript };
|
|
260
293
|
} catch (err) {
|
|
261
294
|
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
262
295
|
} finally {
|
|
296
|
+
ctx.activeTurnCalls.delete(callId);
|
|
263
297
|
clearTranscriptWaiter(ctx, callId);
|
|
264
298
|
}
|
|
265
299
|
}
|
package/src/manager/timers.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import type { CallManagerContext } from "./context.js";
|
|
2
1
|
import { TerminalStates, type CallId } from "../types.js";
|
|
2
|
+
import type { CallManagerContext } from "./context.js";
|
|
3
3
|
import { persistCallRecord } from "./store.js";
|
|
4
4
|
|
|
5
5
|
type TimerContext = Pick<
|
|
@@ -87,8 +87,9 @@ export function resolveTranscriptWaiter(
|
|
|
87
87
|
}
|
|
88
88
|
|
|
89
89
|
export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promise<string> {
|
|
90
|
-
|
|
91
|
-
|
|
90
|
+
if (ctx.transcriptWaiters.has(callId)) {
|
|
91
|
+
return Promise.reject(new Error("Already waiting for transcript"));
|
|
92
|
+
}
|
|
92
93
|
|
|
93
94
|
const timeoutMs = ctx.config.transcriptTimeoutMs;
|
|
94
95
|
return new Promise((resolve, reject) => {
|
package/src/manager.test.ts
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import os from "node:os";
|
|
2
2
|
import path from "node:path";
|
|
3
3
|
import { describe, expect, it } from "vitest";
|
|
4
|
+
import { VoiceCallConfigSchema } from "./config.js";
|
|
5
|
+
import { CallManager } from "./manager.js";
|
|
4
6
|
import type { VoiceCallProvider } from "./providers/base.js";
|
|
5
7
|
import type {
|
|
6
8
|
HangupCallInput,
|
|
@@ -13,13 +15,13 @@ import type {
|
|
|
13
15
|
WebhookContext,
|
|
14
16
|
WebhookVerificationResult,
|
|
15
17
|
} from "./types.js";
|
|
16
|
-
import { VoiceCallConfigSchema } from "./config.js";
|
|
17
|
-
import { CallManager } from "./manager.js";
|
|
18
18
|
|
|
19
19
|
class FakeProvider implements VoiceCallProvider {
|
|
20
20
|
readonly name = "plivo" as const;
|
|
21
21
|
readonly playTtsCalls: PlayTtsInput[] = [];
|
|
22
22
|
readonly hangupCalls: HangupCallInput[] = [];
|
|
23
|
+
readonly startListeningCalls: StartListeningInput[] = [];
|
|
24
|
+
readonly stopListeningCalls: StopListeningInput[] = [];
|
|
23
25
|
|
|
24
26
|
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
|
|
25
27
|
return { ok: true };
|
|
@@ -36,8 +38,12 @@ class FakeProvider implements VoiceCallProvider {
|
|
|
36
38
|
async playTts(input: PlayTtsInput): Promise<void> {
|
|
37
39
|
this.playTtsCalls.push(input);
|
|
38
40
|
}
|
|
39
|
-
async startListening(
|
|
40
|
-
|
|
41
|
+
async startListening(input: StartListeningInput): Promise<void> {
|
|
42
|
+
this.startListeningCalls.push(input);
|
|
43
|
+
}
|
|
44
|
+
async stopListening(input: StopListeningInput): Promise<void> {
|
|
45
|
+
this.stopListeningCalls.push(input);
|
|
46
|
+
}
|
|
41
47
|
}
|
|
42
48
|
|
|
43
49
|
describe("CallManager", () => {
|
|
@@ -261,4 +267,219 @@ describe("CallManager", () => {
|
|
|
261
267
|
|
|
262
268
|
expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
|
|
263
269
|
});
|
|
270
|
+
|
|
271
|
+
it("completes a closed-loop turn without live audio", async () => {
|
|
272
|
+
const config = VoiceCallConfigSchema.parse({
|
|
273
|
+
enabled: true,
|
|
274
|
+
provider: "plivo",
|
|
275
|
+
fromNumber: "+15550000000",
|
|
276
|
+
transcriptTimeoutMs: 5000,
|
|
277
|
+
});
|
|
278
|
+
|
|
279
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
280
|
+
const provider = new FakeProvider();
|
|
281
|
+
const manager = new CallManager(config, storePath);
|
|
282
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
283
|
+
|
|
284
|
+
const started = await manager.initiateCall("+15550000003");
|
|
285
|
+
expect(started.success).toBe(true);
|
|
286
|
+
|
|
287
|
+
manager.processEvent({
|
|
288
|
+
id: "evt-closed-loop-answered",
|
|
289
|
+
type: "call.answered",
|
|
290
|
+
callId: started.callId,
|
|
291
|
+
providerCallId: "request-uuid",
|
|
292
|
+
timestamp: Date.now(),
|
|
293
|
+
});
|
|
294
|
+
|
|
295
|
+
const turnPromise = manager.continueCall(started.callId, "How can I help?");
|
|
296
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
297
|
+
|
|
298
|
+
manager.processEvent({
|
|
299
|
+
id: "evt-closed-loop-speech",
|
|
300
|
+
type: "call.speech",
|
|
301
|
+
callId: started.callId,
|
|
302
|
+
providerCallId: "request-uuid",
|
|
303
|
+
timestamp: Date.now(),
|
|
304
|
+
transcript: "Please check status",
|
|
305
|
+
isFinal: true,
|
|
306
|
+
});
|
|
307
|
+
|
|
308
|
+
const turn = await turnPromise;
|
|
309
|
+
expect(turn.success).toBe(true);
|
|
310
|
+
expect(turn.transcript).toBe("Please check status");
|
|
311
|
+
expect(provider.startListeningCalls).toHaveLength(1);
|
|
312
|
+
expect(provider.stopListeningCalls).toHaveLength(1);
|
|
313
|
+
|
|
314
|
+
const call = manager.getCall(started.callId);
|
|
315
|
+
expect(call?.transcript.map((entry) => entry.text)).toEqual([
|
|
316
|
+
"How can I help?",
|
|
317
|
+
"Please check status",
|
|
318
|
+
]);
|
|
319
|
+
const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
|
|
320
|
+
expect(typeof metadata.lastTurnLatencyMs).toBe("number");
|
|
321
|
+
expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
|
|
322
|
+
expect(metadata.turnCount).toBe(1);
|
|
323
|
+
});
|
|
324
|
+
|
|
325
|
+
it("rejects overlapping continueCall requests for the same call", async () => {
|
|
326
|
+
const config = VoiceCallConfigSchema.parse({
|
|
327
|
+
enabled: true,
|
|
328
|
+
provider: "plivo",
|
|
329
|
+
fromNumber: "+15550000000",
|
|
330
|
+
transcriptTimeoutMs: 5000,
|
|
331
|
+
});
|
|
332
|
+
|
|
333
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
334
|
+
const provider = new FakeProvider();
|
|
335
|
+
const manager = new CallManager(config, storePath);
|
|
336
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
337
|
+
|
|
338
|
+
const started = await manager.initiateCall("+15550000004");
|
|
339
|
+
expect(started.success).toBe(true);
|
|
340
|
+
|
|
341
|
+
manager.processEvent({
|
|
342
|
+
id: "evt-overlap-answered",
|
|
343
|
+
type: "call.answered",
|
|
344
|
+
callId: started.callId,
|
|
345
|
+
providerCallId: "request-uuid",
|
|
346
|
+
timestamp: Date.now(),
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
const first = manager.continueCall(started.callId, "First prompt");
|
|
350
|
+
const second = await manager.continueCall(started.callId, "Second prompt");
|
|
351
|
+
expect(second.success).toBe(false);
|
|
352
|
+
expect(second.error).toBe("Already waiting for transcript");
|
|
353
|
+
|
|
354
|
+
manager.processEvent({
|
|
355
|
+
id: "evt-overlap-speech",
|
|
356
|
+
type: "call.speech",
|
|
357
|
+
callId: started.callId,
|
|
358
|
+
providerCallId: "request-uuid",
|
|
359
|
+
timestamp: Date.now(),
|
|
360
|
+
transcript: "Done",
|
|
361
|
+
isFinal: true,
|
|
362
|
+
});
|
|
363
|
+
|
|
364
|
+
const firstResult = await first;
|
|
365
|
+
expect(firstResult.success).toBe(true);
|
|
366
|
+
expect(firstResult.transcript).toBe("Done");
|
|
367
|
+
expect(provider.startListeningCalls).toHaveLength(1);
|
|
368
|
+
expect(provider.stopListeningCalls).toHaveLength(1);
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
it("tracks latency metadata across multiple closed-loop turns", async () => {
|
|
372
|
+
const config = VoiceCallConfigSchema.parse({
|
|
373
|
+
enabled: true,
|
|
374
|
+
provider: "plivo",
|
|
375
|
+
fromNumber: "+15550000000",
|
|
376
|
+
transcriptTimeoutMs: 5000,
|
|
377
|
+
});
|
|
378
|
+
|
|
379
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
380
|
+
const provider = new FakeProvider();
|
|
381
|
+
const manager = new CallManager(config, storePath);
|
|
382
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
383
|
+
|
|
384
|
+
const started = await manager.initiateCall("+15550000005");
|
|
385
|
+
expect(started.success).toBe(true);
|
|
386
|
+
|
|
387
|
+
manager.processEvent({
|
|
388
|
+
id: "evt-multi-answered",
|
|
389
|
+
type: "call.answered",
|
|
390
|
+
callId: started.callId,
|
|
391
|
+
providerCallId: "request-uuid",
|
|
392
|
+
timestamp: Date.now(),
|
|
393
|
+
});
|
|
394
|
+
|
|
395
|
+
const firstTurn = manager.continueCall(started.callId, "First question");
|
|
396
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
397
|
+
manager.processEvent({
|
|
398
|
+
id: "evt-multi-speech-1",
|
|
399
|
+
type: "call.speech",
|
|
400
|
+
callId: started.callId,
|
|
401
|
+
providerCallId: "request-uuid",
|
|
402
|
+
timestamp: Date.now(),
|
|
403
|
+
transcript: "First answer",
|
|
404
|
+
isFinal: true,
|
|
405
|
+
});
|
|
406
|
+
await firstTurn;
|
|
407
|
+
|
|
408
|
+
const secondTurn = manager.continueCall(started.callId, "Second question");
|
|
409
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
410
|
+
manager.processEvent({
|
|
411
|
+
id: "evt-multi-speech-2",
|
|
412
|
+
type: "call.speech",
|
|
413
|
+
callId: started.callId,
|
|
414
|
+
providerCallId: "request-uuid",
|
|
415
|
+
timestamp: Date.now(),
|
|
416
|
+
transcript: "Second answer",
|
|
417
|
+
isFinal: true,
|
|
418
|
+
});
|
|
419
|
+
const secondResult = await secondTurn;
|
|
420
|
+
|
|
421
|
+
expect(secondResult.success).toBe(true);
|
|
422
|
+
|
|
423
|
+
const call = manager.getCall(started.callId);
|
|
424
|
+
expect(call?.transcript.map((entry) => entry.text)).toEqual([
|
|
425
|
+
"First question",
|
|
426
|
+
"First answer",
|
|
427
|
+
"Second question",
|
|
428
|
+
"Second answer",
|
|
429
|
+
]);
|
|
430
|
+
const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
|
|
431
|
+
expect(metadata.turnCount).toBe(2);
|
|
432
|
+
expect(typeof metadata.lastTurnLatencyMs).toBe("number");
|
|
433
|
+
expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
|
|
434
|
+
expect(provider.startListeningCalls).toHaveLength(2);
|
|
435
|
+
expect(provider.stopListeningCalls).toHaveLength(2);
|
|
436
|
+
});
|
|
437
|
+
|
|
438
|
+
it("handles repeated closed-loop turns without waiter churn", async () => {
|
|
439
|
+
const config = VoiceCallConfigSchema.parse({
|
|
440
|
+
enabled: true,
|
|
441
|
+
provider: "plivo",
|
|
442
|
+
fromNumber: "+15550000000",
|
|
443
|
+
transcriptTimeoutMs: 5000,
|
|
444
|
+
});
|
|
445
|
+
|
|
446
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
447
|
+
const provider = new FakeProvider();
|
|
448
|
+
const manager = new CallManager(config, storePath);
|
|
449
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
450
|
+
|
|
451
|
+
const started = await manager.initiateCall("+15550000006");
|
|
452
|
+
expect(started.success).toBe(true);
|
|
453
|
+
|
|
454
|
+
manager.processEvent({
|
|
455
|
+
id: "evt-loop-answered",
|
|
456
|
+
type: "call.answered",
|
|
457
|
+
callId: started.callId,
|
|
458
|
+
providerCallId: "request-uuid",
|
|
459
|
+
timestamp: Date.now(),
|
|
460
|
+
});
|
|
461
|
+
|
|
462
|
+
for (let i = 1; i <= 5; i++) {
|
|
463
|
+
const turnPromise = manager.continueCall(started.callId, `Prompt ${i}`);
|
|
464
|
+
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
465
|
+
manager.processEvent({
|
|
466
|
+
id: `evt-loop-speech-${i}`,
|
|
467
|
+
type: "call.speech",
|
|
468
|
+
callId: started.callId,
|
|
469
|
+
providerCallId: "request-uuid",
|
|
470
|
+
timestamp: Date.now(),
|
|
471
|
+
transcript: `Answer ${i}`,
|
|
472
|
+
isFinal: true,
|
|
473
|
+
});
|
|
474
|
+
const result = await turnPromise;
|
|
475
|
+
expect(result.success).toBe(true);
|
|
476
|
+
expect(result.transcript).toBe(`Answer ${i}`);
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
const call = manager.getCall(started.callId);
|
|
480
|
+
const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
|
|
481
|
+
expect(metadata.turnCount).toBe(5);
|
|
482
|
+
expect(provider.startListeningCalls).toHaveLength(5);
|
|
483
|
+
expect(provider.stopListeningCalls).toHaveLength(5);
|
|
484
|
+
});
|
|
264
485
|
});
|
package/src/manager.ts
CHANGED
|
@@ -3,8 +3,6 @@ import os from "node:os";
|
|
|
3
3
|
import path from "node:path";
|
|
4
4
|
import type { VoiceCallConfig } from "./config.js";
|
|
5
5
|
import type { CallManagerContext } from "./manager/context.js";
|
|
6
|
-
import type { VoiceCallProvider } from "./providers/base.js";
|
|
7
|
-
import type { CallId, CallRecord, NormalizedEvent, OutboundCallOptions } from "./types.js";
|
|
8
6
|
import { processEvent as processManagerEvent } from "./manager/events.js";
|
|
9
7
|
import { getCallByProviderCallId as getCallByProviderCallIdFromMaps } from "./manager/lookup.js";
|
|
10
8
|
import {
|
|
@@ -15,6 +13,8 @@ import {
|
|
|
15
13
|
speakInitialMessage as speakInitialMessageWithContext,
|
|
16
14
|
} from "./manager/outbound.js";
|
|
17
15
|
import { getCallHistoryFromStore, loadActiveCallsFromStore } from "./manager/store.js";
|
|
16
|
+
import type { VoiceCallProvider } from "./providers/base.js";
|
|
17
|
+
import type { CallId, CallRecord, NormalizedEvent, OutboundCallOptions } from "./types.js";
|
|
18
18
|
import { resolveUserPath } from "./utils.js";
|
|
19
19
|
|
|
20
20
|
function resolveDefaultStoreBase(config: VoiceCallConfig, storePath?: string): string {
|
|
@@ -47,6 +47,7 @@ export class CallManager {
|
|
|
47
47
|
private config: VoiceCallConfig;
|
|
48
48
|
private storePath: string;
|
|
49
49
|
private webhookUrl: string | null = null;
|
|
50
|
+
private activeTurnCalls = new Set<CallId>();
|
|
50
51
|
private transcriptWaiters = new Map<
|
|
51
52
|
CallId,
|
|
52
53
|
{
|
|
@@ -137,6 +138,7 @@ export class CallManager {
|
|
|
137
138
|
config: this.config,
|
|
138
139
|
storePath: this.storePath,
|
|
139
140
|
webhookUrl: this.webhookUrl,
|
|
141
|
+
activeTurnCalls: this.activeTurnCalls,
|
|
140
142
|
transcriptWaiters: this.transcriptWaiters,
|
|
141
143
|
maxDurationTimers: this.maxDurationTimers,
|
|
142
144
|
onCallAnswered: (call) => {
|
package/src/media-stream.test.ts
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
import { describe, expect, it } from "vitest";
|
|
2
|
+
import { MediaStreamHandler } from "./media-stream.js";
|
|
2
3
|
import type {
|
|
3
4
|
OpenAIRealtimeSTTProvider,
|
|
4
5
|
RealtimeSTTSession,
|
|
5
6
|
} from "./providers/stt-openai-realtime.js";
|
|
6
|
-
import { MediaStreamHandler } from "./media-stream.js";
|
|
7
7
|
|
|
8
8
|
const createStubSession = (): RealtimeSTTSession => ({
|
|
9
9
|
connect: async () => {},
|
package/src/providers/plivo.ts
CHANGED
|
@@ -12,9 +12,9 @@ import type {
|
|
|
12
12
|
WebhookContext,
|
|
13
13
|
WebhookVerificationResult,
|
|
14
14
|
} from "../types.js";
|
|
15
|
-
import type { VoiceCallProvider } from "./base.js";
|
|
16
15
|
import { escapeXml } from "../voice-mapping.js";
|
|
17
16
|
import { reconstructWebhookUrl, verifyPlivoWebhook } from "../webhook-security.js";
|
|
17
|
+
import type { VoiceCallProvider } from "./base.js";
|
|
18
18
|
|
|
19
19
|
export interface PlivoProviderOptions {
|
|
20
20
|
/** Override public URL origin for signature verification */
|
|
@@ -22,6 +22,37 @@ function decodeBase64Url(input: string): Buffer {
|
|
|
22
22
|
return Buffer.from(padded, "base64");
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
+
function expectWebhookVerificationSucceeds(params: {
|
|
26
|
+
publicKey: string;
|
|
27
|
+
privateKey: crypto.KeyObject;
|
|
28
|
+
}) {
|
|
29
|
+
const provider = new TelnyxProvider(
|
|
30
|
+
{ apiKey: "KEY123", connectionId: "CONN456", publicKey: params.publicKey },
|
|
31
|
+
{ skipVerification: false },
|
|
32
|
+
);
|
|
33
|
+
|
|
34
|
+
const rawBody = JSON.stringify({
|
|
35
|
+
event_type: "call.initiated",
|
|
36
|
+
payload: { call_control_id: "x" },
|
|
37
|
+
});
|
|
38
|
+
const timestamp = String(Math.floor(Date.now() / 1000));
|
|
39
|
+
const signedPayload = `${timestamp}|${rawBody}`;
|
|
40
|
+
const signature = crypto
|
|
41
|
+
.sign(null, Buffer.from(signedPayload), params.privateKey)
|
|
42
|
+
.toString("base64");
|
|
43
|
+
|
|
44
|
+
const result = provider.verifyWebhook(
|
|
45
|
+
createCtx({
|
|
46
|
+
rawBody,
|
|
47
|
+
headers: {
|
|
48
|
+
"telnyx-signature-ed25519": signature,
|
|
49
|
+
"telnyx-timestamp": timestamp,
|
|
50
|
+
},
|
|
51
|
+
}),
|
|
52
|
+
);
|
|
53
|
+
expect(result.ok).toBe(true);
|
|
54
|
+
}
|
|
55
|
+
|
|
25
56
|
describe("TelnyxProvider.verifyWebhook", () => {
|
|
26
57
|
it("fails closed when public key is missing and skipVerification is false", () => {
|
|
27
58
|
const provider = new TelnyxProvider(
|
|
@@ -63,59 +94,13 @@ describe("TelnyxProvider.verifyWebhook", () => {
|
|
|
63
94
|
|
|
64
95
|
const rawPublicKey = decodeBase64Url(jwk.x as string);
|
|
65
96
|
const rawPublicKeyBase64 = rawPublicKey.toString("base64");
|
|
66
|
-
|
|
67
|
-
const provider = new TelnyxProvider(
|
|
68
|
-
{ apiKey: "KEY123", connectionId: "CONN456", publicKey: rawPublicKeyBase64 },
|
|
69
|
-
{ skipVerification: false },
|
|
70
|
-
);
|
|
71
|
-
|
|
72
|
-
const rawBody = JSON.stringify({
|
|
73
|
-
event_type: "call.initiated",
|
|
74
|
-
payload: { call_control_id: "x" },
|
|
75
|
-
});
|
|
76
|
-
const timestamp = String(Math.floor(Date.now() / 1000));
|
|
77
|
-
const signedPayload = `${timestamp}|${rawBody}`;
|
|
78
|
-
const signature = crypto.sign(null, Buffer.from(signedPayload), privateKey).toString("base64");
|
|
79
|
-
|
|
80
|
-
const result = provider.verifyWebhook(
|
|
81
|
-
createCtx({
|
|
82
|
-
rawBody,
|
|
83
|
-
headers: {
|
|
84
|
-
"telnyx-signature-ed25519": signature,
|
|
85
|
-
"telnyx-timestamp": timestamp,
|
|
86
|
-
},
|
|
87
|
-
}),
|
|
88
|
-
);
|
|
89
|
-
expect(result.ok).toBe(true);
|
|
97
|
+
expectWebhookVerificationSucceeds({ publicKey: rawPublicKeyBase64, privateKey });
|
|
90
98
|
});
|
|
91
99
|
|
|
92
100
|
it("verifies a valid signature with a DER SPKI public key (Base64)", () => {
|
|
93
101
|
const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
|
|
94
102
|
const spkiDer = publicKey.export({ format: "der", type: "spki" }) as Buffer;
|
|
95
103
|
const spkiDerBase64 = spkiDer.toString("base64");
|
|
96
|
-
|
|
97
|
-
const provider = new TelnyxProvider(
|
|
98
|
-
{ apiKey: "KEY123", connectionId: "CONN456", publicKey: spkiDerBase64 },
|
|
99
|
-
{ skipVerification: false },
|
|
100
|
-
);
|
|
101
|
-
|
|
102
|
-
const rawBody = JSON.stringify({
|
|
103
|
-
event_type: "call.initiated",
|
|
104
|
-
payload: { call_control_id: "x" },
|
|
105
|
-
});
|
|
106
|
-
const timestamp = String(Math.floor(Date.now() / 1000));
|
|
107
|
-
const signedPayload = `${timestamp}|${rawBody}`;
|
|
108
|
-
const signature = crypto.sign(null, Buffer.from(signedPayload), privateKey).toString("base64");
|
|
109
|
-
|
|
110
|
-
const result = provider.verifyWebhook(
|
|
111
|
-
createCtx({
|
|
112
|
-
rawBody,
|
|
113
|
-
headers: {
|
|
114
|
-
"telnyx-signature-ed25519": signature,
|
|
115
|
-
"telnyx-timestamp": timestamp,
|
|
116
|
-
},
|
|
117
|
-
}),
|
|
118
|
-
);
|
|
119
|
-
expect(result.ok).toBe(true);
|
|
104
|
+
expectWebhookVerificationSucceeds({ publicKey: spkiDerBase64, privateKey });
|
|
120
105
|
});
|
|
121
106
|
});
|
package/src/providers/telnyx.ts
CHANGED
|
@@ -13,8 +13,8 @@ import type {
|
|
|
13
13
|
WebhookContext,
|
|
14
14
|
WebhookVerificationResult,
|
|
15
15
|
} from "../types.js";
|
|
16
|
-
import type { VoiceCallProvider } from "./base.js";
|
|
17
16
|
import { verifyTelnyxWebhook } from "../webhook-security.js";
|
|
17
|
+
import type { VoiceCallProvider } from "./base.js";
|
|
18
18
|
|
|
19
19
|
/**
|
|
20
20
|
* Telnyx Voice API provider implementation.
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { WebhookContext, WebhookVerificationResult } from "../../types.js";
|
|
2
|
-
import type { TwilioProviderOptions } from "../twilio.js";
|
|
3
2
|
import { verifyTwilioWebhook } from "../../webhook-security.js";
|
|
3
|
+
import type { TwilioProviderOptions } from "../twilio.js";
|
|
4
4
|
|
|
5
5
|
export function verifyTwilioProviderWebhook(params: {
|
|
6
6
|
ctx: WebhookContext;
|
package/src/providers/twilio.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
2
|
import type { TwilioConfig, WebhookSecurityConfig } from "../config.js";
|
|
3
3
|
import type { MediaStreamHandler } from "../media-stream.js";
|
|
4
|
+
import { chunkAudio } from "../telephony-audio.js";
|
|
4
5
|
import type { TelephonyTtsProvider } from "../telephony-tts.js";
|
|
5
6
|
import type {
|
|
6
7
|
HangupCallInput,
|
|
@@ -14,9 +15,8 @@ import type {
|
|
|
14
15
|
WebhookContext,
|
|
15
16
|
WebhookVerificationResult,
|
|
16
17
|
} from "../types.js";
|
|
17
|
-
import type { VoiceCallProvider } from "./base.js";
|
|
18
|
-
import { chunkAudio } from "../telephony-audio.js";
|
|
19
18
|
import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js";
|
|
19
|
+
import type { VoiceCallProvider } from "./base.js";
|
|
20
20
|
import { twilioApiRequest } from "./twilio/api.js";
|
|
21
21
|
import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
|
|
22
22
|
|
package/src/runtime.ts
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
import type { VoiceCallConfig } from "./config.js";
|
|
2
|
-
import type { CoreConfig } from "./core-bridge.js";
|
|
3
|
-
import type { VoiceCallProvider } from "./providers/base.js";
|
|
4
|
-
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
5
2
|
import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
|
|
3
|
+
import type { CoreConfig } from "./core-bridge.js";
|
|
6
4
|
import { CallManager } from "./manager.js";
|
|
5
|
+
import type { VoiceCallProvider } from "./providers/base.js";
|
|
7
6
|
import { MockProvider } from "./providers/mock.js";
|
|
8
7
|
import { PlivoProvider } from "./providers/plivo.js";
|
|
9
8
|
import { TelnyxProvider } from "./providers/telnyx.js";
|
|
10
9
|
import { TwilioProvider } from "./providers/twilio.js";
|
|
10
|
+
import type { TelephonyTtsRuntime } from "./telephony-tts.js";
|
|
11
11
|
import { createTelephonyTtsProvider } from "./telephony-tts.js";
|
|
12
12
|
import { startTunnel, type TunnelResult } from "./tunnel.js";
|
|
13
13
|
import {
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
|
|
2
|
+
import { VoiceCallConfigSchema, type VoiceCallConfig } from "./config.js";
|
|
3
|
+
import type { CallManager } from "./manager.js";
|
|
4
|
+
import type { VoiceCallProvider } from "./providers/base.js";
|
|
5
|
+
import type { CallRecord } from "./types.js";
|
|
6
|
+
import { VoiceCallWebhookServer } from "./webhook.js";
|
|
7
|
+
|
|
8
|
+
const provider: VoiceCallProvider = {
|
|
9
|
+
name: "mock",
|
|
10
|
+
verifyWebhook: () => ({ ok: true }),
|
|
11
|
+
parseWebhookEvent: () => ({ events: [] }),
|
|
12
|
+
initiateCall: async () => ({ providerCallId: "provider-call", status: "initiated" }),
|
|
13
|
+
hangupCall: async () => {},
|
|
14
|
+
playTts: async () => {},
|
|
15
|
+
startListening: async () => {},
|
|
16
|
+
stopListening: async () => {},
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
const createConfig = (overrides: Partial<VoiceCallConfig> = {}): VoiceCallConfig => {
|
|
20
|
+
const base = VoiceCallConfigSchema.parse({});
|
|
21
|
+
base.serve.port = 0;
|
|
22
|
+
|
|
23
|
+
return {
|
|
24
|
+
...base,
|
|
25
|
+
...overrides,
|
|
26
|
+
serve: {
|
|
27
|
+
...base.serve,
|
|
28
|
+
...(overrides.serve ?? {}),
|
|
29
|
+
},
|
|
30
|
+
};
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
const createCall = (startedAt: number): CallRecord => ({
|
|
34
|
+
callId: "call-1",
|
|
35
|
+
providerCallId: "provider-call-1",
|
|
36
|
+
provider: "mock",
|
|
37
|
+
direction: "outbound",
|
|
38
|
+
state: "initiated",
|
|
39
|
+
from: "+15550001234",
|
|
40
|
+
to: "+15550005678",
|
|
41
|
+
startedAt,
|
|
42
|
+
transcript: [],
|
|
43
|
+
processedEventIds: [],
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
const createManager = (calls: CallRecord[]) => {
|
|
47
|
+
const endCall = vi.fn(async () => ({ success: true }));
|
|
48
|
+
const manager = {
|
|
49
|
+
getActiveCalls: () => calls,
|
|
50
|
+
endCall,
|
|
51
|
+
} as unknown as CallManager;
|
|
52
|
+
|
|
53
|
+
return { manager, endCall };
|
|
54
|
+
};
|
|
55
|
+
|
|
56
|
+
describe("VoiceCallWebhookServer stale call reaper", () => {
|
|
57
|
+
beforeEach(() => {
|
|
58
|
+
vi.useFakeTimers();
|
|
59
|
+
});
|
|
60
|
+
|
|
61
|
+
afterEach(() => {
|
|
62
|
+
vi.useRealTimers();
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
it("ends calls older than staleCallReaperSeconds", async () => {
|
|
66
|
+
const now = new Date("2026-02-16T00:00:00Z");
|
|
67
|
+
vi.setSystemTime(now);
|
|
68
|
+
|
|
69
|
+
const call = createCall(now.getTime() - 120_000);
|
|
70
|
+
const { manager, endCall } = createManager([call]);
|
|
71
|
+
const config = createConfig({ staleCallReaperSeconds: 60 });
|
|
72
|
+
const server = new VoiceCallWebhookServer(config, manager, provider);
|
|
73
|
+
|
|
74
|
+
try {
|
|
75
|
+
await server.start();
|
|
76
|
+
await vi.advanceTimersByTimeAsync(30_000);
|
|
77
|
+
expect(endCall).toHaveBeenCalledWith(call.callId);
|
|
78
|
+
} finally {
|
|
79
|
+
await server.stop();
|
|
80
|
+
}
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it("skips calls that are younger than the threshold", async () => {
|
|
84
|
+
const now = new Date("2026-02-16T00:00:00Z");
|
|
85
|
+
vi.setSystemTime(now);
|
|
86
|
+
|
|
87
|
+
const call = createCall(now.getTime() - 10_000);
|
|
88
|
+
const { manager, endCall } = createManager([call]);
|
|
89
|
+
const config = createConfig({ staleCallReaperSeconds: 60 });
|
|
90
|
+
const server = new VoiceCallWebhookServer(config, manager, provider);
|
|
91
|
+
|
|
92
|
+
try {
|
|
93
|
+
await server.start();
|
|
94
|
+
await vi.advanceTimersByTimeAsync(30_000);
|
|
95
|
+
expect(endCall).not.toHaveBeenCalled();
|
|
96
|
+
} finally {
|
|
97
|
+
await server.stop();
|
|
98
|
+
}
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it("does not run when staleCallReaperSeconds is disabled", async () => {
|
|
102
|
+
const now = new Date("2026-02-16T00:00:00Z");
|
|
103
|
+
vi.setSystemTime(now);
|
|
104
|
+
|
|
105
|
+
const call = createCall(now.getTime() - 120_000);
|
|
106
|
+
const { manager, endCall } = createManager([call]);
|
|
107
|
+
const config = createConfig({ staleCallReaperSeconds: 0 });
|
|
108
|
+
const server = new VoiceCallWebhookServer(config, manager, provider);
|
|
109
|
+
|
|
110
|
+
try {
|
|
111
|
+
await server.start();
|
|
112
|
+
await vi.advanceTimersByTimeAsync(60_000);
|
|
113
|
+
expect(endCall).not.toHaveBeenCalled();
|
|
114
|
+
} finally {
|
|
115
|
+
await server.stop();
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
});
|
package/src/webhook.ts
CHANGED
|
@@ -10,11 +10,11 @@ import type { VoiceCallConfig } from "./config.js";
|
|
|
10
10
|
import type { CoreConfig } from "./core-bridge.js";
|
|
11
11
|
import type { CallManager } from "./manager.js";
|
|
12
12
|
import type { MediaStreamConfig } from "./media-stream.js";
|
|
13
|
+
import { MediaStreamHandler } from "./media-stream.js";
|
|
13
14
|
import type { VoiceCallProvider } from "./providers/base.js";
|
|
15
|
+
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
|
14
16
|
import type { TwilioProvider } from "./providers/twilio.js";
|
|
15
17
|
import type { NormalizedEvent, WebhookContext } from "./types.js";
|
|
16
|
-
import { MediaStreamHandler } from "./media-stream.js";
|
|
17
|
-
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
|
18
18
|
|
|
19
19
|
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
|
|
20
20
|
|
|
@@ -28,6 +28,7 @@ export class VoiceCallWebhookServer {
|
|
|
28
28
|
private manager: CallManager;
|
|
29
29
|
private provider: VoiceCallProvider;
|
|
30
30
|
private coreConfig: CoreConfig | null;
|
|
31
|
+
private staleCallReaperInterval: ReturnType<typeof setInterval> | null = null;
|
|
31
32
|
|
|
32
33
|
/** Media stream handler for bidirectional audio (when streaming enabled) */
|
|
33
34
|
private mediaStreamHandler: MediaStreamHandler | null = null;
|
|
@@ -151,6 +152,17 @@ export class VoiceCallWebhookServer {
|
|
|
151
152
|
},
|
|
152
153
|
onDisconnect: (callId) => {
|
|
153
154
|
console.log(`[voice-call] Media stream disconnected: ${callId}`);
|
|
155
|
+
// Auto-end call when media stream disconnects to prevent stuck calls.
|
|
156
|
+
// Without this, calls can remain active indefinitely after the stream closes.
|
|
157
|
+
const disconnectedCall = this.manager.getCallByProviderCallId(callId);
|
|
158
|
+
if (disconnectedCall) {
|
|
159
|
+
console.log(
|
|
160
|
+
`[voice-call] Auto-ending call ${disconnectedCall.callId} on stream disconnect`,
|
|
161
|
+
);
|
|
162
|
+
void this.manager.endCall(disconnectedCall.callId).catch((err) => {
|
|
163
|
+
console.warn(`[voice-call] Failed to auto-end call ${disconnectedCall.callId}:`, err);
|
|
164
|
+
});
|
|
165
|
+
}
|
|
154
166
|
if (this.provider.name === "twilio") {
|
|
155
167
|
(this.provider as TwilioProvider).unregisterCallStream(callId);
|
|
156
168
|
}
|
|
@@ -200,14 +212,51 @@ export class VoiceCallWebhookServer {
|
|
|
200
212
|
console.log(`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`);
|
|
201
213
|
}
|
|
202
214
|
resolve(url);
|
|
215
|
+
|
|
216
|
+
// Start the stale call reaper if configured
|
|
217
|
+
this.startStaleCallReaper();
|
|
203
218
|
});
|
|
204
219
|
});
|
|
205
220
|
}
|
|
206
221
|
|
|
222
|
+
/**
|
|
223
|
+
* Start a periodic reaper that ends calls older than the configured threshold.
|
|
224
|
+
* Catches calls stuck in unexpected states (e.g., notify-mode calls that never
|
|
225
|
+
* receive a terminal webhook from the provider).
|
|
226
|
+
*/
|
|
227
|
+
private startStaleCallReaper(): void {
|
|
228
|
+
const maxAgeSeconds = this.config.staleCallReaperSeconds;
|
|
229
|
+
if (!maxAgeSeconds || maxAgeSeconds <= 0) {
|
|
230
|
+
return;
|
|
231
|
+
}
|
|
232
|
+
|
|
233
|
+
const CHECK_INTERVAL_MS = 30_000; // Check every 30 seconds
|
|
234
|
+
const maxAgeMs = maxAgeSeconds * 1000;
|
|
235
|
+
|
|
236
|
+
this.staleCallReaperInterval = setInterval(() => {
|
|
237
|
+
const now = Date.now();
|
|
238
|
+
for (const call of this.manager.getActiveCalls()) {
|
|
239
|
+
const age = now - call.startedAt;
|
|
240
|
+
if (age > maxAgeMs) {
|
|
241
|
+
console.log(
|
|
242
|
+
`[voice-call] Reaping stale call ${call.callId} (age: ${Math.round(age / 1000)}s, state: ${call.state})`,
|
|
243
|
+
);
|
|
244
|
+
void this.manager.endCall(call.callId).catch((err) => {
|
|
245
|
+
console.warn(`[voice-call] Reaper failed to end call ${call.callId}:`, err);
|
|
246
|
+
});
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
}, CHECK_INTERVAL_MS);
|
|
250
|
+
}
|
|
251
|
+
|
|
207
252
|
/**
|
|
208
253
|
* Stop the webhook server.
|
|
209
254
|
*/
|
|
210
255
|
async stop(): Promise<void> {
|
|
256
|
+
if (this.staleCallReaperInterval) {
|
|
257
|
+
clearInterval(this.staleCallReaperInterval);
|
|
258
|
+
this.staleCallReaperInterval = null;
|
|
259
|
+
}
|
|
211
260
|
return new Promise((resolve) => {
|
|
212
261
|
if (this.server) {
|
|
213
262
|
this.server.close(() => {
|