@openclaw/voice-call 2026.2.14 → 2026.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Changelog
2
2
 
3
+ ## 2026.2.17
4
+
5
+ ### Changes
6
+
7
+ - Version alignment with core OpenClaw release numbers.
8
+
9
+ ## 2026.2.16
10
+
11
+ ### Changes
12
+
13
+ - Version alignment with core OpenClaw release numbers.
14
+
15
+ ## 2026.2.15
16
+
17
+ ### Changes
18
+
19
+ - Version alignment with core OpenClaw release numbers.
20
+
3
21
  ## 2026.2.14
4
22
 
5
23
  ### Changes
package/README.md CHANGED
@@ -87,6 +87,26 @@ Notes:
87
87
  - Telnyx requires `telnyx.publicKey` (or `TELNYX_PUBLIC_KEY`) unless `skipSignatureVerification` is true.
88
88
  - `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
89
89
 
90
+ ## Stale call reaper
91
+
92
+ Use `staleCallReaperSeconds` to end calls that never receive a terminal webhook
93
+ (for example, notify-mode calls that never complete). The default is `0`
94
+ (disabled).
95
+
96
+ Recommended ranges:
97
+
98
+ - **Production:** `120`–`300` seconds for notify-style flows.
99
+ - Keep this value **higher than `maxDurationSeconds`** so normal calls can
100
+ finish. A good starting point is `maxDurationSeconds + 30–60` seconds.
101
+
102
+ Example:
103
+
104
+ ```json5
105
+ {
106
+ staleCallReaperSeconds: 360,
107
+ }
108
+ ```
109
+
90
110
  ## TTS for calls
91
111
 
92
112
  Voice Call uses the core `messages.tts` configuration (OpenAI or ElevenLabs) for
package/index.ts CHANGED
@@ -1,6 +1,5 @@
1
- import type { GatewayRequestHandlerOptions, OpenClawPluginApi } from "openclaw/plugin-sdk";
2
1
  import { Type } from "@sinclair/typebox";
3
- import type { CoreConfig } from "./src/core-bridge.js";
2
+ import type { GatewayRequestHandlerOptions, OpenClawPluginApi } from "openclaw/plugin-sdk";
4
3
  import { registerVoiceCallCli } from "./src/cli.js";
5
4
  import {
6
5
  VoiceCallConfigSchema,
@@ -8,6 +7,7 @@ import {
8
7
  validateProviderConfig,
9
8
  type VoiceCallConfig,
10
9
  } from "./src/config.js";
10
+ import type { CoreConfig } from "./src/core-bridge.js";
11
11
  import { createVoiceCallRuntime, type VoiceCallRuntime } from "./src/runtime.js";
12
12
 
13
13
  const voiceCallConfigSchema = {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openclaw/voice-call",
3
- "version": "2026.2.14",
3
+ "version": "2026.2.17",
4
4
  "description": "OpenClaw voice-call plugin",
5
5
  "type": "module",
6
6
  "dependencies": {
package/src/cli.ts CHANGED
@@ -1,7 +1,7 @@
1
- import type { Command } from "commander";
2
1
  import fs from "node:fs";
3
2
  import os from "node:os";
4
3
  import path from "node:path";
4
+ import type { Command } from "commander";
5
5
  import { sleep } from "openclaw/plugin-sdk";
6
6
  import type { VoiceCallConfig } from "./config.js";
7
7
  import type { VoiceCallRuntime } from "./runtime.js";
@@ -41,6 +41,46 @@ function resolveDefaultStorePath(config: VoiceCallConfig): string {
41
41
  return path.join(base, "calls.jsonl");
42
42
  }
43
43
 
44
+ function percentile(values: number[], p: number): number {
45
+ if (values.length === 0) {
46
+ return 0;
47
+ }
48
+ const sorted = [...values].sort((a, b) => a - b);
49
+ const idx = Math.min(sorted.length - 1, Math.max(0, Math.ceil((p / 100) * sorted.length) - 1));
50
+ return sorted[idx] ?? 0;
51
+ }
52
+
53
+ function summarizeSeries(values: number[]): {
54
+ count: number;
55
+ minMs: number;
56
+ maxMs: number;
57
+ avgMs: number;
58
+ p50Ms: number;
59
+ p95Ms: number;
60
+ } {
61
+ if (values.length === 0) {
62
+ return { count: 0, minMs: 0, maxMs: 0, avgMs: 0, p50Ms: 0, p95Ms: 0 };
63
+ }
64
+
65
+ const minMs = values.reduce(
66
+ (min, value) => (value < min ? value : min),
67
+ Number.POSITIVE_INFINITY,
68
+ );
69
+ const maxMs = values.reduce(
70
+ (max, value) => (value > max ? value : max),
71
+ Number.NEGATIVE_INFINITY,
72
+ );
73
+ const avgMs = values.reduce((sum, value) => sum + value, 0) / values.length;
74
+ return {
75
+ count: values.length,
76
+ minMs,
77
+ maxMs,
78
+ avgMs,
79
+ p50Ms: percentile(values, 50),
80
+ p95Ms: percentile(values, 95),
81
+ };
82
+ }
83
+
44
84
  export function registerVoiceCallCli(params: {
45
85
  program: Command;
46
86
  config: VoiceCallConfig;
@@ -216,6 +256,57 @@ export function registerVoiceCallCli(params: {
216
256
  }
217
257
  });
218
258
 
259
+ root
260
+ .command("latency")
261
+ .description("Summarize turn latency metrics from voice-call JSONL logs")
262
+ .option("--file <path>", "Path to calls.jsonl", resolveDefaultStorePath(config))
263
+ .option("--last <n>", "Analyze last N records", "200")
264
+ .action(async (options: { file: string; last?: string }) => {
265
+ const file = options.file;
266
+ const last = Math.max(1, Number(options.last ?? 200));
267
+
268
+ if (!fs.existsSync(file)) {
269
+ throw new Error("No log file at " + file);
270
+ }
271
+
272
+ const content = fs.readFileSync(file, "utf8");
273
+ const lines = content.split("\n").filter(Boolean).slice(-last);
274
+
275
+ const turnLatencyMs: number[] = [];
276
+ const listenWaitMs: number[] = [];
277
+
278
+ for (const line of lines) {
279
+ try {
280
+ const parsed = JSON.parse(line) as {
281
+ metadata?: { lastTurnLatencyMs?: unknown; lastTurnListenWaitMs?: unknown };
282
+ };
283
+ const latency = parsed.metadata?.lastTurnLatencyMs;
284
+ const listenWait = parsed.metadata?.lastTurnListenWaitMs;
285
+ if (typeof latency === "number" && Number.isFinite(latency)) {
286
+ turnLatencyMs.push(latency);
287
+ }
288
+ if (typeof listenWait === "number" && Number.isFinite(listenWait)) {
289
+ listenWaitMs.push(listenWait);
290
+ }
291
+ } catch {
292
+ // ignore malformed JSON lines
293
+ }
294
+ }
295
+
296
+ // eslint-disable-next-line no-console
297
+ console.log(
298
+ JSON.stringify(
299
+ {
300
+ recordsScanned: lines.length,
301
+ turnLatency: summarizeSeries(turnLatencyMs),
302
+ listenWait: summarizeSeries(listenWaitMs),
303
+ },
304
+ null,
305
+ 2,
306
+ ),
307
+ );
308
+ });
309
+
219
310
  root
220
311
  .command("expose")
221
312
  .description("Enable/disable Tailscale serve/funnel for the webhook")
@@ -10,6 +10,7 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
10
10
  allowFrom: [],
11
11
  outbound: { defaultMode: "notify", notifyHangupDelaySec: 3 },
12
12
  maxDurationSeconds: 300,
13
+ staleCallReaperSeconds: 600,
13
14
  silenceTimeoutMs: 800,
14
15
  transcriptTimeoutMs: 180000,
15
16
  ringTimeoutMs: 30000,
@@ -32,7 +33,10 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
32
33
  },
33
34
  skipSignatureVerification: false,
34
35
  stt: { provider: "openai", model: "whisper-1" },
35
- tts: { provider: "openai", model: "gpt-4o-mini-tts", voice: "coral" },
36
+ tts: {
37
+ provider: "openai",
38
+ openai: { model: "gpt-4o-mini-tts", voice: "coral" },
39
+ },
36
40
  responseModel: "openai/gpt-4o-mini",
37
41
  responseTimeoutMs: 30000,
38
42
  };
package/src/config.ts CHANGED
@@ -1,3 +1,9 @@
1
+ import {
2
+ TtsAutoSchema,
3
+ TtsConfigSchema,
4
+ TtsModeSchema,
5
+ TtsProviderSchema,
6
+ } from "openclaw/plugin-sdk";
1
7
  import { z } from "zod";
2
8
 
3
9
  // -----------------------------------------------------------------------------
@@ -77,81 +83,7 @@ export const SttConfigSchema = z
77
83
  .default({ provider: "openai", model: "whisper-1" });
78
84
  export type SttConfig = z.infer<typeof SttConfigSchema>;
79
85
 
80
- export const TtsProviderSchema = z.enum(["openai", "elevenlabs", "edge"]);
81
- export const TtsModeSchema = z.enum(["final", "all"]);
82
- export const TtsAutoSchema = z.enum(["off", "always", "inbound", "tagged"]);
83
-
84
- export const TtsConfigSchema = z
85
- .object({
86
- auto: TtsAutoSchema.optional(),
87
- enabled: z.boolean().optional(),
88
- mode: TtsModeSchema.optional(),
89
- provider: TtsProviderSchema.optional(),
90
- summaryModel: z.string().optional(),
91
- modelOverrides: z
92
- .object({
93
- enabled: z.boolean().optional(),
94
- allowText: z.boolean().optional(),
95
- allowProvider: z.boolean().optional(),
96
- allowVoice: z.boolean().optional(),
97
- allowModelId: z.boolean().optional(),
98
- allowVoiceSettings: z.boolean().optional(),
99
- allowNormalization: z.boolean().optional(),
100
- allowSeed: z.boolean().optional(),
101
- })
102
- .strict()
103
- .optional(),
104
- elevenlabs: z
105
- .object({
106
- apiKey: z.string().optional(),
107
- baseUrl: z.string().optional(),
108
- voiceId: z.string().optional(),
109
- modelId: z.string().optional(),
110
- seed: z.number().int().min(0).max(4294967295).optional(),
111
- applyTextNormalization: z.enum(["auto", "on", "off"]).optional(),
112
- languageCode: z.string().optional(),
113
- voiceSettings: z
114
- .object({
115
- stability: z.number().min(0).max(1).optional(),
116
- similarityBoost: z.number().min(0).max(1).optional(),
117
- style: z.number().min(0).max(1).optional(),
118
- useSpeakerBoost: z.boolean().optional(),
119
- speed: z.number().min(0.5).max(2).optional(),
120
- })
121
- .strict()
122
- .optional(),
123
- })
124
- .strict()
125
- .optional(),
126
- openai: z
127
- .object({
128
- apiKey: z.string().optional(),
129
- model: z.string().optional(),
130
- voice: z.string().optional(),
131
- })
132
- .strict()
133
- .optional(),
134
- edge: z
135
- .object({
136
- enabled: z.boolean().optional(),
137
- voice: z.string().optional(),
138
- lang: z.string().optional(),
139
- outputFormat: z.string().optional(),
140
- pitch: z.string().optional(),
141
- rate: z.string().optional(),
142
- volume: z.string().optional(),
143
- saveSubtitles: z.boolean().optional(),
144
- proxy: z.string().optional(),
145
- timeoutMs: z.number().int().min(1000).max(120000).optional(),
146
- })
147
- .strict()
148
- .optional(),
149
- prefsPath: z.string().optional(),
150
- maxTextLength: z.number().int().min(1).optional(),
151
- timeoutMs: z.number().int().min(1000).max(120000).optional(),
152
- })
153
- .strict()
154
- .optional();
86
+ export { TtsAutoSchema, TtsConfigSchema, TtsModeSchema, TtsProviderSchema };
155
87
  export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
156
88
 
157
89
  // -----------------------------------------------------------------------------
@@ -341,6 +273,14 @@ export const VoiceCallConfigSchema = z
341
273
  /** Maximum call duration in seconds */
342
274
  maxDurationSeconds: z.number().int().positive().default(300),
343
275
 
276
+ /**
277
+ * Maximum age of a call in seconds before it is automatically reaped.
278
+ * Catches calls stuck in unexpected states (e.g., notify-mode calls that
279
+ * never receive a terminal webhook). Set to 0 to disable.
280
+ * Default: 0 (disabled). Recommended: 120-300 for production.
281
+ */
282
+ staleCallReaperSeconds: z.number().int().nonnegative().default(0),
283
+
344
284
  /** Silence timeout for end-of-speech detection (ms) */
345
285
  silenceTimeoutMs: z.number().int().positive().default(800),
346
286
 
@@ -24,6 +24,7 @@ export type CallManagerRuntimeDeps = {
24
24
  };
25
25
 
26
26
  export type CallManagerTransientState = {
27
+ activeTurnCalls: Set<CallId>;
27
28
  transcriptWaiters: Map<CallId, TranscriptWaiter>;
28
29
  maxDurationTimers: Map<CallId, NodeJS.Timeout>;
29
30
  };
@@ -2,9 +2,10 @@ import fs from "node:fs";
2
2
  import os from "node:os";
3
3
  import path from "node:path";
4
4
  import { describe, expect, it } from "vitest";
5
+ import { VoiceCallConfigSchema } from "../config.js";
6
+ import type { VoiceCallProvider } from "../providers/base.js";
5
7
  import type { HangupCallInput, NormalizedEvent } from "../types.js";
6
8
  import type { CallManagerContext } from "./context.js";
7
- import { VoiceCallConfigSchema } from "../config.js";
8
9
  import { processEvent } from "./events.js";
9
10
 
10
11
  function createContext(overrides: Partial<CallManagerContext> = {}): CallManagerContext {
@@ -23,21 +24,35 @@ function createContext(overrides: Partial<CallManagerContext> = {}): CallManager
23
24
  }),
24
25
  storePath,
25
26
  webhookUrl: null,
27
+ activeTurnCalls: new Set(),
26
28
  transcriptWaiters: new Map(),
27
29
  maxDurationTimers: new Map(),
28
30
  ...overrides,
29
31
  };
30
32
  }
31
33
 
34
+ function createProvider(overrides: Partial<VoiceCallProvider> = {}): VoiceCallProvider {
35
+ return {
36
+ name: "plivo",
37
+ verifyWebhook: () => ({ ok: true }),
38
+ parseWebhookEvent: () => ({ events: [] }),
39
+ initiateCall: async () => ({ providerCallId: "provider-call-id", status: "initiated" }),
40
+ hangupCall: async () => {},
41
+ playTts: async () => {},
42
+ startListening: async () => {},
43
+ stopListening: async () => {},
44
+ ...overrides,
45
+ };
46
+ }
47
+
32
48
  describe("processEvent (functional)", () => {
33
49
  it("calls provider hangup when rejecting inbound call", () => {
34
50
  const hangupCalls: HangupCallInput[] = [];
35
- const provider = {
36
- name: "plivo" as const,
37
- async hangupCall(input: HangupCallInput): Promise<void> {
51
+ const provider = createProvider({
52
+ hangupCall: async (input: HangupCallInput): Promise<void> => {
38
53
  hangupCalls.push(input);
39
54
  },
40
- };
55
+ });
41
56
 
42
57
  const ctx = createContext({
43
58
  config: VoiceCallConfigSchema.parse({
@@ -98,12 +113,11 @@ describe("processEvent (functional)", () => {
98
113
 
99
114
  it("calls hangup only once for duplicate events for same rejected call", () => {
100
115
  const hangupCalls: HangupCallInput[] = [];
101
- const provider = {
102
- name: "plivo" as const,
103
- async hangupCall(input: HangupCallInput): Promise<void> {
116
+ const provider = createProvider({
117
+ hangupCall: async (input: HangupCallInput): Promise<void> => {
104
118
  hangupCalls.push(input);
105
119
  },
106
- };
120
+ });
107
121
  const ctx = createContext({
108
122
  config: VoiceCallConfigSchema.parse({
109
123
  enabled: true,
@@ -208,12 +222,11 @@ describe("processEvent (functional)", () => {
208
222
  });
209
223
 
210
224
  it("when hangup throws, logs and does not throw", () => {
211
- const provider = {
212
- name: "plivo" as const,
213
- async hangupCall(): Promise<void> {
225
+ const provider = createProvider({
226
+ hangupCall: async (): Promise<void> => {
214
227
  throw new Error("provider down");
215
228
  },
216
- };
229
+ });
217
230
  const ctx = createContext({
218
231
  config: VoiceCallConfigSchema.parse({
219
232
  enabled: true,
@@ -1,7 +1,7 @@
1
1
  import crypto from "node:crypto";
2
+ import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
2
3
  import type { CallRecord, CallState, NormalizedEvent } from "../types.js";
3
4
  import type { CallManagerContext } from "./context.js";
4
- import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
5
5
  import { findCall } from "./lookup.js";
6
6
  import { endCall } from "./outbound.js";
7
7
  import { addTranscriptEntry, transitionState } from "./state.js";
@@ -1,6 +1,5 @@
1
1
  import crypto from "node:crypto";
2
2
  import type { CallMode } from "../config.js";
3
- import type { CallManagerContext } from "./context.js";
4
3
  import {
5
4
  TerminalStates,
6
5
  type CallId,
@@ -8,6 +7,7 @@ import {
8
7
  type OutboundCallOptions,
9
8
  } from "../types.js";
10
9
  import { mapVoiceToPolly } from "../voice-mapping.js";
10
+ import type { CallManagerContext } from "./context.js";
11
11
  import { getCallByProviderCallId } from "./lookup.js";
12
12
  import { addTranscriptEntry, transitionState } from "./state.js";
13
13
  import { persistCallRecord } from "./store.js";
@@ -36,6 +36,7 @@ type ConversationContext = Pick<
36
36
  | "provider"
37
37
  | "config"
38
38
  | "storePath"
39
+ | "activeTurnCalls"
39
40
  | "transcriptWaiters"
40
41
  | "maxDurationTimers"
41
42
  >;
@@ -158,7 +159,6 @@ export async function speak(
158
159
  if (TerminalStates.has(call.state)) {
159
160
  return { success: false, error: "Call has ended" };
160
161
  }
161
-
162
162
  try {
163
163
  transitionState(call, "speaking");
164
164
  persistCallRecord(ctx.storePath, call);
@@ -242,6 +242,12 @@ export async function continueCall(
242
242
  if (TerminalStates.has(call.state)) {
243
243
  return { success: false, error: "Call has ended" };
244
244
  }
245
+ if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) {
246
+ return { success: false, error: "Already waiting for transcript" };
247
+ }
248
+ ctx.activeTurnCalls.add(callId);
249
+
250
+ const turnStartedAt = Date.now();
245
251
 
246
252
  try {
247
253
  await speak(ctx, callId, prompt);
@@ -249,17 +255,45 @@ export async function continueCall(
249
255
  transitionState(call, "listening");
250
256
  persistCallRecord(ctx.storePath, call);
251
257
 
258
+ const listenStartedAt = Date.now();
252
259
  await ctx.provider.startListening({ callId, providerCallId: call.providerCallId });
253
260
 
254
261
  const transcript = await waitForFinalTranscript(ctx, callId);
262
+ const transcriptReceivedAt = Date.now();
255
263
 
256
264
  // Best-effort: stop listening after final transcript.
257
265
  await ctx.provider.stopListening({ callId, providerCallId: call.providerCallId });
258
266
 
267
+ const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
268
+ const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
269
+ const turnCount =
270
+ call.metadata && typeof call.metadata.turnCount === "number"
271
+ ? call.metadata.turnCount + 1
272
+ : 1;
273
+
274
+ call.metadata = {
275
+ ...(call.metadata ?? {}),
276
+ turnCount,
277
+ lastTurnLatencyMs,
278
+ lastTurnListenWaitMs,
279
+ lastTurnCompletedAt: transcriptReceivedAt,
280
+ };
281
+ persistCallRecord(ctx.storePath, call);
282
+
283
+ console.log(
284
+ "[voice-call] continueCall latency call=" +
285
+ call.callId +
286
+ " totalMs=" +
287
+ String(lastTurnLatencyMs) +
288
+ " listenWaitMs=" +
289
+ String(lastTurnListenWaitMs),
290
+ );
291
+
259
292
  return { success: true, transcript };
260
293
  } catch (err) {
261
294
  return { success: false, error: err instanceof Error ? err.message : String(err) };
262
295
  } finally {
296
+ ctx.activeTurnCalls.delete(callId);
263
297
  clearTranscriptWaiter(ctx, callId);
264
298
  }
265
299
  }
@@ -1,5 +1,5 @@
1
- import type { CallManagerContext } from "./context.js";
2
1
  import { TerminalStates, type CallId } from "../types.js";
2
+ import type { CallManagerContext } from "./context.js";
3
3
  import { persistCallRecord } from "./store.js";
4
4
 
5
5
  type TimerContext = Pick<
@@ -87,8 +87,9 @@ export function resolveTranscriptWaiter(
87
87
  }
88
88
 
89
89
  export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promise<string> {
90
- // Only allow one in-flight waiter per call.
91
- rejectTranscriptWaiter(ctx, callId, "Transcript waiter replaced");
90
+ if (ctx.transcriptWaiters.has(callId)) {
91
+ return Promise.reject(new Error("Already waiting for transcript"));
92
+ }
92
93
 
93
94
  const timeoutMs = ctx.config.transcriptTimeoutMs;
94
95
  return new Promise((resolve, reject) => {
@@ -1,6 +1,8 @@
1
1
  import os from "node:os";
2
2
  import path from "node:path";
3
3
  import { describe, expect, it } from "vitest";
4
+ import { VoiceCallConfigSchema } from "./config.js";
5
+ import { CallManager } from "./manager.js";
4
6
  import type { VoiceCallProvider } from "./providers/base.js";
5
7
  import type {
6
8
  HangupCallInput,
@@ -13,13 +15,13 @@ import type {
13
15
  WebhookContext,
14
16
  WebhookVerificationResult,
15
17
  } from "./types.js";
16
- import { VoiceCallConfigSchema } from "./config.js";
17
- import { CallManager } from "./manager.js";
18
18
 
19
19
  class FakeProvider implements VoiceCallProvider {
20
20
  readonly name = "plivo" as const;
21
21
  readonly playTtsCalls: PlayTtsInput[] = [];
22
22
  readonly hangupCalls: HangupCallInput[] = [];
23
+ readonly startListeningCalls: StartListeningInput[] = [];
24
+ readonly stopListeningCalls: StopListeningInput[] = [];
23
25
 
24
26
  verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
25
27
  return { ok: true };
@@ -36,8 +38,12 @@ class FakeProvider implements VoiceCallProvider {
36
38
  async playTts(input: PlayTtsInput): Promise<void> {
37
39
  this.playTtsCalls.push(input);
38
40
  }
39
- async startListening(_input: StartListeningInput): Promise<void> {}
40
- async stopListening(_input: StopListeningInput): Promise<void> {}
41
+ async startListening(input: StartListeningInput): Promise<void> {
42
+ this.startListeningCalls.push(input);
43
+ }
44
+ async stopListening(input: StopListeningInput): Promise<void> {
45
+ this.stopListeningCalls.push(input);
46
+ }
41
47
  }
42
48
 
43
49
  describe("CallManager", () => {
@@ -261,4 +267,219 @@ describe("CallManager", () => {
261
267
 
262
268
  expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
263
269
  });
270
+
271
+ it("completes a closed-loop turn without live audio", async () => {
272
+ const config = VoiceCallConfigSchema.parse({
273
+ enabled: true,
274
+ provider: "plivo",
275
+ fromNumber: "+15550000000",
276
+ transcriptTimeoutMs: 5000,
277
+ });
278
+
279
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
280
+ const provider = new FakeProvider();
281
+ const manager = new CallManager(config, storePath);
282
+ manager.initialize(provider, "https://example.com/voice/webhook");
283
+
284
+ const started = await manager.initiateCall("+15550000003");
285
+ expect(started.success).toBe(true);
286
+
287
+ manager.processEvent({
288
+ id: "evt-closed-loop-answered",
289
+ type: "call.answered",
290
+ callId: started.callId,
291
+ providerCallId: "request-uuid",
292
+ timestamp: Date.now(),
293
+ });
294
+
295
+ const turnPromise = manager.continueCall(started.callId, "How can I help?");
296
+ await new Promise((resolve) => setTimeout(resolve, 0));
297
+
298
+ manager.processEvent({
299
+ id: "evt-closed-loop-speech",
300
+ type: "call.speech",
301
+ callId: started.callId,
302
+ providerCallId: "request-uuid",
303
+ timestamp: Date.now(),
304
+ transcript: "Please check status",
305
+ isFinal: true,
306
+ });
307
+
308
+ const turn = await turnPromise;
309
+ expect(turn.success).toBe(true);
310
+ expect(turn.transcript).toBe("Please check status");
311
+ expect(provider.startListeningCalls).toHaveLength(1);
312
+ expect(provider.stopListeningCalls).toHaveLength(1);
313
+
314
+ const call = manager.getCall(started.callId);
315
+ expect(call?.transcript.map((entry) => entry.text)).toEqual([
316
+ "How can I help?",
317
+ "Please check status",
318
+ ]);
319
+ const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
320
+ expect(typeof metadata.lastTurnLatencyMs).toBe("number");
321
+ expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
322
+ expect(metadata.turnCount).toBe(1);
323
+ });
324
+
325
+ it("rejects overlapping continueCall requests for the same call", async () => {
326
+ const config = VoiceCallConfigSchema.parse({
327
+ enabled: true,
328
+ provider: "plivo",
329
+ fromNumber: "+15550000000",
330
+ transcriptTimeoutMs: 5000,
331
+ });
332
+
333
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
334
+ const provider = new FakeProvider();
335
+ const manager = new CallManager(config, storePath);
336
+ manager.initialize(provider, "https://example.com/voice/webhook");
337
+
338
+ const started = await manager.initiateCall("+15550000004");
339
+ expect(started.success).toBe(true);
340
+
341
+ manager.processEvent({
342
+ id: "evt-overlap-answered",
343
+ type: "call.answered",
344
+ callId: started.callId,
345
+ providerCallId: "request-uuid",
346
+ timestamp: Date.now(),
347
+ });
348
+
349
+ const first = manager.continueCall(started.callId, "First prompt");
350
+ const second = await manager.continueCall(started.callId, "Second prompt");
351
+ expect(second.success).toBe(false);
352
+ expect(second.error).toBe("Already waiting for transcript");
353
+
354
+ manager.processEvent({
355
+ id: "evt-overlap-speech",
356
+ type: "call.speech",
357
+ callId: started.callId,
358
+ providerCallId: "request-uuid",
359
+ timestamp: Date.now(),
360
+ transcript: "Done",
361
+ isFinal: true,
362
+ });
363
+
364
+ const firstResult = await first;
365
+ expect(firstResult.success).toBe(true);
366
+ expect(firstResult.transcript).toBe("Done");
367
+ expect(provider.startListeningCalls).toHaveLength(1);
368
+ expect(provider.stopListeningCalls).toHaveLength(1);
369
+ });
370
+
371
+ it("tracks latency metadata across multiple closed-loop turns", async () => {
372
+ const config = VoiceCallConfigSchema.parse({
373
+ enabled: true,
374
+ provider: "plivo",
375
+ fromNumber: "+15550000000",
376
+ transcriptTimeoutMs: 5000,
377
+ });
378
+
379
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
380
+ const provider = new FakeProvider();
381
+ const manager = new CallManager(config, storePath);
382
+ manager.initialize(provider, "https://example.com/voice/webhook");
383
+
384
+ const started = await manager.initiateCall("+15550000005");
385
+ expect(started.success).toBe(true);
386
+
387
+ manager.processEvent({
388
+ id: "evt-multi-answered",
389
+ type: "call.answered",
390
+ callId: started.callId,
391
+ providerCallId: "request-uuid",
392
+ timestamp: Date.now(),
393
+ });
394
+
395
+ const firstTurn = manager.continueCall(started.callId, "First question");
396
+ await new Promise((resolve) => setTimeout(resolve, 0));
397
+ manager.processEvent({
398
+ id: "evt-multi-speech-1",
399
+ type: "call.speech",
400
+ callId: started.callId,
401
+ providerCallId: "request-uuid",
402
+ timestamp: Date.now(),
403
+ transcript: "First answer",
404
+ isFinal: true,
405
+ });
406
+ await firstTurn;
407
+
408
+ const secondTurn = manager.continueCall(started.callId, "Second question");
409
+ await new Promise((resolve) => setTimeout(resolve, 0));
410
+ manager.processEvent({
411
+ id: "evt-multi-speech-2",
412
+ type: "call.speech",
413
+ callId: started.callId,
414
+ providerCallId: "request-uuid",
415
+ timestamp: Date.now(),
416
+ transcript: "Second answer",
417
+ isFinal: true,
418
+ });
419
+ const secondResult = await secondTurn;
420
+
421
+ expect(secondResult.success).toBe(true);
422
+
423
+ const call = manager.getCall(started.callId);
424
+ expect(call?.transcript.map((entry) => entry.text)).toEqual([
425
+ "First question",
426
+ "First answer",
427
+ "Second question",
428
+ "Second answer",
429
+ ]);
430
+ const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
431
+ expect(metadata.turnCount).toBe(2);
432
+ expect(typeof metadata.lastTurnLatencyMs).toBe("number");
433
+ expect(typeof metadata.lastTurnListenWaitMs).toBe("number");
434
+ expect(provider.startListeningCalls).toHaveLength(2);
435
+ expect(provider.stopListeningCalls).toHaveLength(2);
436
+ });
437
+
438
+ it("handles repeated closed-loop turns without waiter churn", async () => {
439
+ const config = VoiceCallConfigSchema.parse({
440
+ enabled: true,
441
+ provider: "plivo",
442
+ fromNumber: "+15550000000",
443
+ transcriptTimeoutMs: 5000,
444
+ });
445
+
446
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
447
+ const provider = new FakeProvider();
448
+ const manager = new CallManager(config, storePath);
449
+ manager.initialize(provider, "https://example.com/voice/webhook");
450
+
451
+ const started = await manager.initiateCall("+15550000006");
452
+ expect(started.success).toBe(true);
453
+
454
+ manager.processEvent({
455
+ id: "evt-loop-answered",
456
+ type: "call.answered",
457
+ callId: started.callId,
458
+ providerCallId: "request-uuid",
459
+ timestamp: Date.now(),
460
+ });
461
+
462
+ for (let i = 1; i <= 5; i++) {
463
+ const turnPromise = manager.continueCall(started.callId, `Prompt ${i}`);
464
+ await new Promise((resolve) => setTimeout(resolve, 0));
465
+ manager.processEvent({
466
+ id: `evt-loop-speech-${i}`,
467
+ type: "call.speech",
468
+ callId: started.callId,
469
+ providerCallId: "request-uuid",
470
+ timestamp: Date.now(),
471
+ transcript: `Answer ${i}`,
472
+ isFinal: true,
473
+ });
474
+ const result = await turnPromise;
475
+ expect(result.success).toBe(true);
476
+ expect(result.transcript).toBe(`Answer ${i}`);
477
+ }
478
+
479
+ const call = manager.getCall(started.callId);
480
+ const metadata = (call?.metadata ?? {}) as Record<string, unknown>;
481
+ expect(metadata.turnCount).toBe(5);
482
+ expect(provider.startListeningCalls).toHaveLength(5);
483
+ expect(provider.stopListeningCalls).toHaveLength(5);
484
+ });
264
485
  });
package/src/manager.ts CHANGED
@@ -3,8 +3,6 @@ import os from "node:os";
3
3
  import path from "node:path";
4
4
  import type { VoiceCallConfig } from "./config.js";
5
5
  import type { CallManagerContext } from "./manager/context.js";
6
- import type { VoiceCallProvider } from "./providers/base.js";
7
- import type { CallId, CallRecord, NormalizedEvent, OutboundCallOptions } from "./types.js";
8
6
  import { processEvent as processManagerEvent } from "./manager/events.js";
9
7
  import { getCallByProviderCallId as getCallByProviderCallIdFromMaps } from "./manager/lookup.js";
10
8
  import {
@@ -15,6 +13,8 @@ import {
15
13
  speakInitialMessage as speakInitialMessageWithContext,
16
14
  } from "./manager/outbound.js";
17
15
  import { getCallHistoryFromStore, loadActiveCallsFromStore } from "./manager/store.js";
16
+ import type { VoiceCallProvider } from "./providers/base.js";
17
+ import type { CallId, CallRecord, NormalizedEvent, OutboundCallOptions } from "./types.js";
18
18
  import { resolveUserPath } from "./utils.js";
19
19
 
20
20
  function resolveDefaultStoreBase(config: VoiceCallConfig, storePath?: string): string {
@@ -47,6 +47,7 @@ export class CallManager {
47
47
  private config: VoiceCallConfig;
48
48
  private storePath: string;
49
49
  private webhookUrl: string | null = null;
50
+ private activeTurnCalls = new Set<CallId>();
50
51
  private transcriptWaiters = new Map<
51
52
  CallId,
52
53
  {
@@ -137,6 +138,7 @@ export class CallManager {
137
138
  config: this.config,
138
139
  storePath: this.storePath,
139
140
  webhookUrl: this.webhookUrl,
141
+ activeTurnCalls: this.activeTurnCalls,
140
142
  transcriptWaiters: this.transcriptWaiters,
141
143
  maxDurationTimers: this.maxDurationTimers,
142
144
  onCallAnswered: (call) => {
@@ -1,9 +1,9 @@
1
1
  import { describe, expect, it } from "vitest";
2
+ import { MediaStreamHandler } from "./media-stream.js";
2
3
  import type {
3
4
  OpenAIRealtimeSTTProvider,
4
5
  RealtimeSTTSession,
5
6
  } from "./providers/stt-openai-realtime.js";
6
- import { MediaStreamHandler } from "./media-stream.js";
7
7
 
8
8
  const createStubSession = (): RealtimeSTTSession => ({
9
9
  connect: async () => {},
@@ -12,9 +12,9 @@ import type {
12
12
  WebhookContext,
13
13
  WebhookVerificationResult,
14
14
  } from "../types.js";
15
- import type { VoiceCallProvider } from "./base.js";
16
15
  import { escapeXml } from "../voice-mapping.js";
17
16
  import { reconstructWebhookUrl, verifyPlivoWebhook } from "../webhook-security.js";
17
+ import type { VoiceCallProvider } from "./base.js";
18
18
 
19
19
  export interface PlivoProviderOptions {
20
20
  /** Override public URL origin for signature verification */
@@ -22,6 +22,37 @@ function decodeBase64Url(input: string): Buffer {
22
22
  return Buffer.from(padded, "base64");
23
23
  }
24
24
 
25
+ function expectWebhookVerificationSucceeds(params: {
26
+ publicKey: string;
27
+ privateKey: crypto.KeyObject;
28
+ }) {
29
+ const provider = new TelnyxProvider(
30
+ { apiKey: "KEY123", connectionId: "CONN456", publicKey: params.publicKey },
31
+ { skipVerification: false },
32
+ );
33
+
34
+ const rawBody = JSON.stringify({
35
+ event_type: "call.initiated",
36
+ payload: { call_control_id: "x" },
37
+ });
38
+ const timestamp = String(Math.floor(Date.now() / 1000));
39
+ const signedPayload = `${timestamp}|${rawBody}`;
40
+ const signature = crypto
41
+ .sign(null, Buffer.from(signedPayload), params.privateKey)
42
+ .toString("base64");
43
+
44
+ const result = provider.verifyWebhook(
45
+ createCtx({
46
+ rawBody,
47
+ headers: {
48
+ "telnyx-signature-ed25519": signature,
49
+ "telnyx-timestamp": timestamp,
50
+ },
51
+ }),
52
+ );
53
+ expect(result.ok).toBe(true);
54
+ }
55
+
25
56
  describe("TelnyxProvider.verifyWebhook", () => {
26
57
  it("fails closed when public key is missing and skipVerification is false", () => {
27
58
  const provider = new TelnyxProvider(
@@ -63,59 +94,13 @@ describe("TelnyxProvider.verifyWebhook", () => {
63
94
 
64
95
  const rawPublicKey = decodeBase64Url(jwk.x as string);
65
96
  const rawPublicKeyBase64 = rawPublicKey.toString("base64");
66
-
67
- const provider = new TelnyxProvider(
68
- { apiKey: "KEY123", connectionId: "CONN456", publicKey: rawPublicKeyBase64 },
69
- { skipVerification: false },
70
- );
71
-
72
- const rawBody = JSON.stringify({
73
- event_type: "call.initiated",
74
- payload: { call_control_id: "x" },
75
- });
76
- const timestamp = String(Math.floor(Date.now() / 1000));
77
- const signedPayload = `${timestamp}|${rawBody}`;
78
- const signature = crypto.sign(null, Buffer.from(signedPayload), privateKey).toString("base64");
79
-
80
- const result = provider.verifyWebhook(
81
- createCtx({
82
- rawBody,
83
- headers: {
84
- "telnyx-signature-ed25519": signature,
85
- "telnyx-timestamp": timestamp,
86
- },
87
- }),
88
- );
89
- expect(result.ok).toBe(true);
97
+ expectWebhookVerificationSucceeds({ publicKey: rawPublicKeyBase64, privateKey });
90
98
  });
91
99
 
92
100
  it("verifies a valid signature with a DER SPKI public key (Base64)", () => {
93
101
  const { publicKey, privateKey } = crypto.generateKeyPairSync("ed25519");
94
102
  const spkiDer = publicKey.export({ format: "der", type: "spki" }) as Buffer;
95
103
  const spkiDerBase64 = spkiDer.toString("base64");
96
-
97
- const provider = new TelnyxProvider(
98
- { apiKey: "KEY123", connectionId: "CONN456", publicKey: spkiDerBase64 },
99
- { skipVerification: false },
100
- );
101
-
102
- const rawBody = JSON.stringify({
103
- event_type: "call.initiated",
104
- payload: { call_control_id: "x" },
105
- });
106
- const timestamp = String(Math.floor(Date.now() / 1000));
107
- const signedPayload = `${timestamp}|${rawBody}`;
108
- const signature = crypto.sign(null, Buffer.from(signedPayload), privateKey).toString("base64");
109
-
110
- const result = provider.verifyWebhook(
111
- createCtx({
112
- rawBody,
113
- headers: {
114
- "telnyx-signature-ed25519": signature,
115
- "telnyx-timestamp": timestamp,
116
- },
117
- }),
118
- );
119
- expect(result.ok).toBe(true);
104
+ expectWebhookVerificationSucceeds({ publicKey: spkiDerBase64, privateKey });
120
105
  });
121
106
  });
@@ -13,8 +13,8 @@ import type {
13
13
  WebhookContext,
14
14
  WebhookVerificationResult,
15
15
  } from "../types.js";
16
- import type { VoiceCallProvider } from "./base.js";
17
16
  import { verifyTelnyxWebhook } from "../webhook-security.js";
17
+ import type { VoiceCallProvider } from "./base.js";
18
18
 
19
19
  /**
20
20
  * Telnyx Voice API provider implementation.
@@ -1,6 +1,6 @@
1
1
  import type { WebhookContext, WebhookVerificationResult } from "../../types.js";
2
- import type { TwilioProviderOptions } from "../twilio.js";
3
2
  import { verifyTwilioWebhook } from "../../webhook-security.js";
3
+ import type { TwilioProviderOptions } from "../twilio.js";
4
4
 
5
5
  export function verifyTwilioProviderWebhook(params: {
6
6
  ctx: WebhookContext;
@@ -1,6 +1,7 @@
1
1
  import crypto from "node:crypto";
2
2
  import type { TwilioConfig, WebhookSecurityConfig } from "../config.js";
3
3
  import type { MediaStreamHandler } from "../media-stream.js";
4
+ import { chunkAudio } from "../telephony-audio.js";
4
5
  import type { TelephonyTtsProvider } from "../telephony-tts.js";
5
6
  import type {
6
7
  HangupCallInput,
@@ -14,9 +15,8 @@ import type {
14
15
  WebhookContext,
15
16
  WebhookVerificationResult,
16
17
  } from "../types.js";
17
- import type { VoiceCallProvider } from "./base.js";
18
- import { chunkAudio } from "../telephony-audio.js";
19
18
  import { escapeXml, mapVoiceToPolly } from "../voice-mapping.js";
19
+ import type { VoiceCallProvider } from "./base.js";
20
20
  import { twilioApiRequest } from "./twilio/api.js";
21
21
  import { verifyTwilioProviderWebhook } from "./twilio/webhook.js";
22
22
 
package/src/runtime.ts CHANGED
@@ -1,13 +1,13 @@
1
1
  import type { VoiceCallConfig } from "./config.js";
2
- import type { CoreConfig } from "./core-bridge.js";
3
- import type { VoiceCallProvider } from "./providers/base.js";
4
- import type { TelephonyTtsRuntime } from "./telephony-tts.js";
5
2
  import { resolveVoiceCallConfig, validateProviderConfig } from "./config.js";
3
+ import type { CoreConfig } from "./core-bridge.js";
6
4
  import { CallManager } from "./manager.js";
5
+ import type { VoiceCallProvider } from "./providers/base.js";
7
6
  import { MockProvider } from "./providers/mock.js";
8
7
  import { PlivoProvider } from "./providers/plivo.js";
9
8
  import { TelnyxProvider } from "./providers/telnyx.js";
10
9
  import { TwilioProvider } from "./providers/twilio.js";
10
+ import type { TelephonyTtsRuntime } from "./telephony-tts.js";
11
11
  import { createTelephonyTtsProvider } from "./telephony-tts.js";
12
12
  import { startTunnel, type TunnelResult } from "./tunnel.js";
13
13
  import {
@@ -0,0 +1,118 @@
1
+ import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
2
+ import { VoiceCallConfigSchema, type VoiceCallConfig } from "./config.js";
3
+ import type { CallManager } from "./manager.js";
4
+ import type { VoiceCallProvider } from "./providers/base.js";
5
+ import type { CallRecord } from "./types.js";
6
+ import { VoiceCallWebhookServer } from "./webhook.js";
7
+
8
+ const provider: VoiceCallProvider = {
9
+ name: "mock",
10
+ verifyWebhook: () => ({ ok: true }),
11
+ parseWebhookEvent: () => ({ events: [] }),
12
+ initiateCall: async () => ({ providerCallId: "provider-call", status: "initiated" }),
13
+ hangupCall: async () => {},
14
+ playTts: async () => {},
15
+ startListening: async () => {},
16
+ stopListening: async () => {},
17
+ };
18
+
19
+ const createConfig = (overrides: Partial<VoiceCallConfig> = {}): VoiceCallConfig => {
20
+ const base = VoiceCallConfigSchema.parse({});
21
+ base.serve.port = 0;
22
+
23
+ return {
24
+ ...base,
25
+ ...overrides,
26
+ serve: {
27
+ ...base.serve,
28
+ ...(overrides.serve ?? {}),
29
+ },
30
+ };
31
+ };
32
+
33
+ const createCall = (startedAt: number): CallRecord => ({
34
+ callId: "call-1",
35
+ providerCallId: "provider-call-1",
36
+ provider: "mock",
37
+ direction: "outbound",
38
+ state: "initiated",
39
+ from: "+15550001234",
40
+ to: "+15550005678",
41
+ startedAt,
42
+ transcript: [],
43
+ processedEventIds: [],
44
+ });
45
+
46
+ const createManager = (calls: CallRecord[]) => {
47
+ const endCall = vi.fn(async () => ({ success: true }));
48
+ const manager = {
49
+ getActiveCalls: () => calls,
50
+ endCall,
51
+ } as unknown as CallManager;
52
+
53
+ return { manager, endCall };
54
+ };
55
+
56
+ describe("VoiceCallWebhookServer stale call reaper", () => {
57
+ beforeEach(() => {
58
+ vi.useFakeTimers();
59
+ });
60
+
61
+ afterEach(() => {
62
+ vi.useRealTimers();
63
+ });
64
+
65
+ it("ends calls older than staleCallReaperSeconds", async () => {
66
+ const now = new Date("2026-02-16T00:00:00Z");
67
+ vi.setSystemTime(now);
68
+
69
+ const call = createCall(now.getTime() - 120_000);
70
+ const { manager, endCall } = createManager([call]);
71
+ const config = createConfig({ staleCallReaperSeconds: 60 });
72
+ const server = new VoiceCallWebhookServer(config, manager, provider);
73
+
74
+ try {
75
+ await server.start();
76
+ await vi.advanceTimersByTimeAsync(30_000);
77
+ expect(endCall).toHaveBeenCalledWith(call.callId);
78
+ } finally {
79
+ await server.stop();
80
+ }
81
+ });
82
+
83
+ it("skips calls that are younger than the threshold", async () => {
84
+ const now = new Date("2026-02-16T00:00:00Z");
85
+ vi.setSystemTime(now);
86
+
87
+ const call = createCall(now.getTime() - 10_000);
88
+ const { manager, endCall } = createManager([call]);
89
+ const config = createConfig({ staleCallReaperSeconds: 60 });
90
+ const server = new VoiceCallWebhookServer(config, manager, provider);
91
+
92
+ try {
93
+ await server.start();
94
+ await vi.advanceTimersByTimeAsync(30_000);
95
+ expect(endCall).not.toHaveBeenCalled();
96
+ } finally {
97
+ await server.stop();
98
+ }
99
+ });
100
+
101
+ it("does not run when staleCallReaperSeconds is disabled", async () => {
102
+ const now = new Date("2026-02-16T00:00:00Z");
103
+ vi.setSystemTime(now);
104
+
105
+ const call = createCall(now.getTime() - 120_000);
106
+ const { manager, endCall } = createManager([call]);
107
+ const config = createConfig({ staleCallReaperSeconds: 0 });
108
+ const server = new VoiceCallWebhookServer(config, manager, provider);
109
+
110
+ try {
111
+ await server.start();
112
+ await vi.advanceTimersByTimeAsync(60_000);
113
+ expect(endCall).not.toHaveBeenCalled();
114
+ } finally {
115
+ await server.stop();
116
+ }
117
+ });
118
+ });
package/src/webhook.ts CHANGED
@@ -10,11 +10,11 @@ import type { VoiceCallConfig } from "./config.js";
10
10
  import type { CoreConfig } from "./core-bridge.js";
11
11
  import type { CallManager } from "./manager.js";
12
12
  import type { MediaStreamConfig } from "./media-stream.js";
13
+ import { MediaStreamHandler } from "./media-stream.js";
13
14
  import type { VoiceCallProvider } from "./providers/base.js";
15
+ import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
14
16
  import type { TwilioProvider } from "./providers/twilio.js";
15
17
  import type { NormalizedEvent, WebhookContext } from "./types.js";
16
- import { MediaStreamHandler } from "./media-stream.js";
17
- import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
18
18
 
19
19
  const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
20
20
 
@@ -28,6 +28,7 @@ export class VoiceCallWebhookServer {
28
28
  private manager: CallManager;
29
29
  private provider: VoiceCallProvider;
30
30
  private coreConfig: CoreConfig | null;
31
+ private staleCallReaperInterval: ReturnType<typeof setInterval> | null = null;
31
32
 
32
33
  /** Media stream handler for bidirectional audio (when streaming enabled) */
33
34
  private mediaStreamHandler: MediaStreamHandler | null = null;
@@ -151,6 +152,17 @@ export class VoiceCallWebhookServer {
151
152
  },
152
153
  onDisconnect: (callId) => {
153
154
  console.log(`[voice-call] Media stream disconnected: ${callId}`);
155
+ // Auto-end call when media stream disconnects to prevent stuck calls.
156
+ // Without this, calls can remain active indefinitely after the stream closes.
157
+ const disconnectedCall = this.manager.getCallByProviderCallId(callId);
158
+ if (disconnectedCall) {
159
+ console.log(
160
+ `[voice-call] Auto-ending call ${disconnectedCall.callId} on stream disconnect`,
161
+ );
162
+ void this.manager.endCall(disconnectedCall.callId).catch((err) => {
163
+ console.warn(`[voice-call] Failed to auto-end call ${disconnectedCall.callId}:`, err);
164
+ });
165
+ }
154
166
  if (this.provider.name === "twilio") {
155
167
  (this.provider as TwilioProvider).unregisterCallStream(callId);
156
168
  }
@@ -200,14 +212,51 @@ export class VoiceCallWebhookServer {
200
212
  console.log(`[voice-call] Media stream WebSocket on ws://${bind}:${port}${streamPath}`);
201
213
  }
202
214
  resolve(url);
215
+
216
+ // Start the stale call reaper if configured
217
+ this.startStaleCallReaper();
203
218
  });
204
219
  });
205
220
  }
206
221
 
222
+ /**
223
+ * Start a periodic reaper that ends calls older than the configured threshold.
224
+ * Catches calls stuck in unexpected states (e.g., notify-mode calls that never
225
+ * receive a terminal webhook from the provider).
226
+ */
227
+ private startStaleCallReaper(): void {
228
+ const maxAgeSeconds = this.config.staleCallReaperSeconds;
229
+ if (!maxAgeSeconds || maxAgeSeconds <= 0) {
230
+ return;
231
+ }
232
+
233
+ const CHECK_INTERVAL_MS = 30_000; // Check every 30 seconds
234
+ const maxAgeMs = maxAgeSeconds * 1000;
235
+
236
+ this.staleCallReaperInterval = setInterval(() => {
237
+ const now = Date.now();
238
+ for (const call of this.manager.getActiveCalls()) {
239
+ const age = now - call.startedAt;
240
+ if (age > maxAgeMs) {
241
+ console.log(
242
+ `[voice-call] Reaping stale call ${call.callId} (age: ${Math.round(age / 1000)}s, state: ${call.state})`,
243
+ );
244
+ void this.manager.endCall(call.callId).catch((err) => {
245
+ console.warn(`[voice-call] Reaper failed to end call ${call.callId}:`, err);
246
+ });
247
+ }
248
+ }
249
+ }, CHECK_INTERVAL_MS);
250
+ }
251
+
207
252
  /**
208
253
  * Stop the webhook server.
209
254
  */
210
255
  async stop(): Promise<void> {
256
+ if (this.staleCallReaperInterval) {
257
+ clearInterval(this.staleCallReaperInterval);
258
+ this.staleCallReaperInterval = null;
259
+ }
211
260
  return new Promise((resolve) => {
212
261
  if (this.server) {
213
262
  this.server.close(() => {