@openclaw/voice-call 2026.2.1 → 2026.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # Changelog
2
2
 
3
- ## 2026.2.1
3
+ ## 2026.2.2
4
4
 
5
5
  ### Changes
6
6
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openclaw/voice-call",
3
- "version": "2026.2.1",
3
+ "version": "2026.2.2",
4
4
  "description": "OpenClaw voice-call plugin",
5
5
  "type": "module",
6
6
  "dependencies": {
@@ -0,0 +1,19 @@
1
+ export function normalizePhoneNumber(input?: string): string {
2
+ if (!input) {
3
+ return "";
4
+ }
5
+ return input.replace(/\D/g, "");
6
+ }
7
+
8
+ export function isAllowlistedCaller(
9
+ normalizedFrom: string,
10
+ allowFrom: string[] | undefined,
11
+ ): boolean {
12
+ if (!normalizedFrom) {
13
+ return false;
14
+ }
15
+ return (allowFrom ?? []).some((num) => {
16
+ const normalizedAllow = normalizePhoneNumber(num);
17
+ return normalizedAllow !== "" && normalizedAllow === normalizedFrom;
18
+ });
19
+ }
@@ -148,6 +148,34 @@ describe("validateProviderConfig", () => {
148
148
  "plugins.entries.voice-call.config.telnyx.apiKey is required (or set TELNYX_API_KEY env)",
149
149
  );
150
150
  });
151
+
152
+ it("fails validation when allowlist inbound policy lacks public key", () => {
153
+ const config = createBaseConfig("telnyx");
154
+ config.inboundPolicy = "allowlist";
155
+ config.telnyx = { apiKey: "KEY123", connectionId: "CONN456" };
156
+
157
+ const result = validateProviderConfig(config);
158
+
159
+ expect(result.valid).toBe(false);
160
+ expect(result.errors).toContain(
161
+ "plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
162
+ );
163
+ });
164
+
165
+ it("passes validation when allowlist inbound policy has public key", () => {
166
+ const config = createBaseConfig("telnyx");
167
+ config.inboundPolicy = "allowlist";
168
+ config.telnyx = {
169
+ apiKey: "KEY123",
170
+ connectionId: "CONN456",
171
+ publicKey: "public-key",
172
+ };
173
+
174
+ const result = validateProviderConfig(config);
175
+
176
+ expect(result.valid).toBe(true);
177
+ expect(result.errors).toEqual([]);
178
+ });
151
179
  });
152
180
 
153
181
  describe("plivo provider", () => {
package/src/config.ts CHANGED
@@ -448,6 +448,14 @@ export function validateProviderConfig(config: VoiceCallConfig): {
448
448
  "plugins.entries.voice-call.config.telnyx.connectionId is required (or set TELNYX_CONNECTION_ID env)",
449
449
  );
450
450
  }
451
+ if (
452
+ (config.inboundPolicy === "allowlist" || config.inboundPolicy === "pairing") &&
453
+ !config.telnyx?.publicKey
454
+ ) {
455
+ errors.push(
456
+ "plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
457
+ );
458
+ }
451
459
  }
452
460
 
453
461
  if (config.provider === "twilio") {
@@ -121,15 +121,29 @@ function resolveOpenClawRoot(): string {
121
121
  throw new Error("Unable to resolve core root. Set OPENCLAW_ROOT to the package root.");
122
122
  }
123
123
 
124
- async function importCoreModule<T>(relativePath: string): Promise<T> {
125
- const root = resolveOpenClawRoot();
126
- const distPath = path.join(root, "dist", relativePath);
124
+ async function importCoreExtensionAPI(): Promise<{
125
+ resolveAgentDir: CoreAgentDeps["resolveAgentDir"];
126
+ resolveAgentWorkspaceDir: CoreAgentDeps["resolveAgentWorkspaceDir"];
127
+ DEFAULT_MODEL: string;
128
+ DEFAULT_PROVIDER: string;
129
+ resolveAgentIdentity: CoreAgentDeps["resolveAgentIdentity"];
130
+ resolveThinkingDefault: CoreAgentDeps["resolveThinkingDefault"];
131
+ runEmbeddedPiAgent: CoreAgentDeps["runEmbeddedPiAgent"];
132
+ resolveAgentTimeoutMs: CoreAgentDeps["resolveAgentTimeoutMs"];
133
+ ensureAgentWorkspace: CoreAgentDeps["ensureAgentWorkspace"];
134
+ resolveStorePath: CoreAgentDeps["resolveStorePath"];
135
+ loadSessionStore: CoreAgentDeps["loadSessionStore"];
136
+ saveSessionStore: CoreAgentDeps["saveSessionStore"];
137
+ resolveSessionFilePath: CoreAgentDeps["resolveSessionFilePath"];
138
+ }> {
139
+ // Do not import any other module. You can't touch this or you will be fired.
140
+ const distPath = path.join(resolveOpenClawRoot(), "dist", "extensionAPI.js");
127
141
  if (!fs.existsSync(distPath)) {
128
142
  throw new Error(
129
143
  `Missing core module at ${distPath}. Run \`pnpm build\` or install the official package.`,
130
144
  );
131
145
  }
132
- return (await import(pathToFileURL(distPath).href)) as T;
146
+ return await import(pathToFileURL(distPath).href);
133
147
  }
134
148
 
135
149
  export async function loadCoreAgentDeps(): Promise<CoreAgentDeps> {
@@ -138,62 +152,7 @@ export async function loadCoreAgentDeps(): Promise<CoreAgentDeps> {
138
152
  }
139
153
 
140
154
  coreDepsPromise = (async () => {
141
- const [
142
- agentScope,
143
- defaults,
144
- identity,
145
- modelSelection,
146
- piEmbedded,
147
- timeout,
148
- workspace,
149
- sessions,
150
- ] = await Promise.all([
151
- importCoreModule<{
152
- resolveAgentDir: CoreAgentDeps["resolveAgentDir"];
153
- resolveAgentWorkspaceDir: CoreAgentDeps["resolveAgentWorkspaceDir"];
154
- }>("agents/agent-scope.js"),
155
- importCoreModule<{
156
- DEFAULT_MODEL: string;
157
- DEFAULT_PROVIDER: string;
158
- }>("agents/defaults.js"),
159
- importCoreModule<{
160
- resolveAgentIdentity: CoreAgentDeps["resolveAgentIdentity"];
161
- }>("agents/identity.js"),
162
- importCoreModule<{
163
- resolveThinkingDefault: CoreAgentDeps["resolveThinkingDefault"];
164
- }>("agents/model-selection.js"),
165
- importCoreModule<{
166
- runEmbeddedPiAgent: CoreAgentDeps["runEmbeddedPiAgent"];
167
- }>("agents/pi-embedded.js"),
168
- importCoreModule<{
169
- resolveAgentTimeoutMs: CoreAgentDeps["resolveAgentTimeoutMs"];
170
- }>("agents/timeout.js"),
171
- importCoreModule<{
172
- ensureAgentWorkspace: CoreAgentDeps["ensureAgentWorkspace"];
173
- }>("agents/workspace.js"),
174
- importCoreModule<{
175
- resolveStorePath: CoreAgentDeps["resolveStorePath"];
176
- loadSessionStore: CoreAgentDeps["loadSessionStore"];
177
- saveSessionStore: CoreAgentDeps["saveSessionStore"];
178
- resolveSessionFilePath: CoreAgentDeps["resolveSessionFilePath"];
179
- }>("config/sessions.js"),
180
- ]);
181
-
182
- return {
183
- resolveAgentDir: agentScope.resolveAgentDir,
184
- resolveAgentWorkspaceDir: agentScope.resolveAgentWorkspaceDir,
185
- resolveAgentIdentity: identity.resolveAgentIdentity,
186
- resolveThinkingDefault: modelSelection.resolveThinkingDefault,
187
- runEmbeddedPiAgent: piEmbedded.runEmbeddedPiAgent,
188
- resolveAgentTimeoutMs: timeout.resolveAgentTimeoutMs,
189
- ensureAgentWorkspace: workspace.ensureAgentWorkspace,
190
- resolveStorePath: sessions.resolveStorePath,
191
- loadSessionStore: sessions.loadSessionStore,
192
- saveSessionStore: sessions.saveSessionStore,
193
- resolveSessionFilePath: sessions.resolveSessionFilePath,
194
- DEFAULT_MODEL: defaults.DEFAULT_MODEL,
195
- DEFAULT_PROVIDER: defaults.DEFAULT_PROVIDER,
196
- };
155
+ return await importCoreExtensionAPI();
197
156
  })();
198
157
 
199
158
  return coreDepsPromise;
@@ -1,6 +1,7 @@
1
1
  import crypto from "node:crypto";
2
2
  import type { CallRecord, CallState, NormalizedEvent } from "../types.js";
3
3
  import type { CallManagerContext } from "./context.js";
4
+ import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
4
5
  import { findCall } from "./lookup.js";
5
6
  import { endCall } from "./outbound.js";
6
7
  import { addTranscriptEntry, transitionState } from "./state.js";
@@ -29,11 +30,12 @@ function shouldAcceptInbound(
29
30
 
30
31
  case "allowlist":
31
32
  case "pairing": {
32
- const normalized = from?.replace(/\D/g, "") || "";
33
- const allowed = (allowFrom || []).some((num) => {
34
- const normalizedAllow = num.replace(/\D/g, "");
35
- return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
36
- });
33
+ const normalized = normalizePhoneNumber(from);
34
+ if (!normalized) {
35
+ console.log("[voice-call] Inbound call rejected: missing caller ID");
36
+ return false;
37
+ }
38
+ const allowed = isAllowlistedCaller(normalized, allowFrom);
37
39
  const status = allowed ? "accepted" : "rejected";
38
40
  console.log(
39
41
  `[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
@@ -19,6 +19,7 @@ import { CallManager } from "./manager.js";
19
19
  class FakeProvider implements VoiceCallProvider {
20
20
  readonly name = "plivo" as const;
21
21
  readonly playTtsCalls: PlayTtsInput[] = [];
22
+ readonly hangupCalls: HangupCallInput[] = [];
22
23
 
23
24
  verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
24
25
  return { ok: true };
@@ -29,7 +30,9 @@ class FakeProvider implements VoiceCallProvider {
29
30
  async initiateCall(_input: InitiateCallInput): Promise<InitiateCallResult> {
30
31
  return { providerCallId: "request-uuid", status: "initiated" };
31
32
  }
32
- async hangupCall(_input: HangupCallInput): Promise<void> {}
33
+ async hangupCall(input: HangupCallInput): Promise<void> {
34
+ this.hangupCalls.push(input);
35
+ }
33
36
  async playTts(input: PlayTtsInput): Promise<void> {
34
37
  this.playTtsCalls.push(input);
35
38
  }
@@ -102,4 +105,90 @@ describe("CallManager", () => {
102
105
  expect(provider.playTtsCalls).toHaveLength(1);
103
106
  expect(provider.playTtsCalls[0]?.text).toBe("Hello there");
104
107
  });
108
+
109
+ it("rejects inbound calls with missing caller ID when allowlist enabled", () => {
110
+ const config = VoiceCallConfigSchema.parse({
111
+ enabled: true,
112
+ provider: "plivo",
113
+ fromNumber: "+15550000000",
114
+ inboundPolicy: "allowlist",
115
+ allowFrom: ["+15550001234"],
116
+ });
117
+
118
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
119
+ const provider = new FakeProvider();
120
+ const manager = new CallManager(config, storePath);
121
+ manager.initialize(provider, "https://example.com/voice/webhook");
122
+
123
+ manager.processEvent({
124
+ id: "evt-allowlist-missing",
125
+ type: "call.initiated",
126
+ callId: "call-missing",
127
+ providerCallId: "provider-missing",
128
+ timestamp: Date.now(),
129
+ direction: "inbound",
130
+ to: "+15550000000",
131
+ });
132
+
133
+ expect(manager.getCallByProviderCallId("provider-missing")).toBeUndefined();
134
+ expect(provider.hangupCalls).toHaveLength(1);
135
+ expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-missing");
136
+ });
137
+
138
+ it("rejects inbound calls that only match allowlist suffixes", () => {
139
+ const config = VoiceCallConfigSchema.parse({
140
+ enabled: true,
141
+ provider: "plivo",
142
+ fromNumber: "+15550000000",
143
+ inboundPolicy: "allowlist",
144
+ allowFrom: ["+15550001234"],
145
+ });
146
+
147
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
148
+ const provider = new FakeProvider();
149
+ const manager = new CallManager(config, storePath);
150
+ manager.initialize(provider, "https://example.com/voice/webhook");
151
+
152
+ manager.processEvent({
153
+ id: "evt-allowlist-suffix",
154
+ type: "call.initiated",
155
+ callId: "call-suffix",
156
+ providerCallId: "provider-suffix",
157
+ timestamp: Date.now(),
158
+ direction: "inbound",
159
+ from: "+99915550001234",
160
+ to: "+15550000000",
161
+ });
162
+
163
+ expect(manager.getCallByProviderCallId("provider-suffix")).toBeUndefined();
164
+ expect(provider.hangupCalls).toHaveLength(1);
165
+ expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-suffix");
166
+ });
167
+
168
+ it("accepts inbound calls that exactly match the allowlist", () => {
169
+ const config = VoiceCallConfigSchema.parse({
170
+ enabled: true,
171
+ provider: "plivo",
172
+ fromNumber: "+15550000000",
173
+ inboundPolicy: "allowlist",
174
+ allowFrom: ["+15550001234"],
175
+ });
176
+
177
+ const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
178
+ const manager = new CallManager(config, storePath);
179
+ manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
180
+
181
+ manager.processEvent({
182
+ id: "evt-allowlist-exact",
183
+ type: "call.initiated",
184
+ callId: "call-exact",
185
+ providerCallId: "provider-exact",
186
+ timestamp: Date.now(),
187
+ direction: "inbound",
188
+ from: "+15550001234",
189
+ to: "+15550000000",
190
+ });
191
+
192
+ expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
193
+ });
105
194
  });
package/src/manager.ts CHANGED
@@ -5,6 +5,7 @@ import os from "node:os";
5
5
  import path from "node:path";
6
6
  import type { CallMode, VoiceCallConfig } from "./config.js";
7
7
  import type { VoiceCallProvider } from "./providers/base.js";
8
+ import { isAllowlistedCaller, normalizePhoneNumber } from "./allowlist.js";
8
9
  import {
9
10
  type CallId,
10
11
  type CallRecord,
@@ -474,11 +475,12 @@ export class CallManager {
474
475
 
475
476
  case "allowlist":
476
477
  case "pairing": {
477
- const normalized = from?.replace(/\D/g, "") || "";
478
- const allowed = (allowFrom || []).some((num) => {
479
- const normalizedAllow = num.replace(/\D/g, "");
480
- return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
481
- });
478
+ const normalized = normalizePhoneNumber(from);
479
+ if (!normalized) {
480
+ console.log("[voice-call] Inbound call rejected: missing caller ID");
481
+ return false;
482
+ }
483
+ const allowed = isAllowlistedCaller(normalized, allowFrom);
482
484
  const status = allowed ? "accepted" : "rejected";
483
485
  console.log(
484
486
  `[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
@@ -551,7 +553,7 @@ export class CallManager {
551
553
  if (!call && event.direction === "inbound" && event.providerCallId) {
552
554
  // Check if we should accept this inbound call
553
555
  if (!this.shouldAcceptInbound(event.from)) {
554
- // TODO: Could hang up the call here
556
+ void this.rejectInboundCall(event);
555
557
  return;
556
558
  }
557
559
 
@@ -653,6 +655,25 @@ export class CallManager {
653
655
  this.persistCallRecord(call);
654
656
  }
655
657
 
658
+ private async rejectInboundCall(event: NormalizedEvent): Promise<void> {
659
+ if (!this.provider || !event.providerCallId) {
660
+ return;
661
+ }
662
+ const callId = event.callId || event.providerCallId;
663
+ try {
664
+ await this.provider.hangupCall({
665
+ callId,
666
+ providerCallId: event.providerCallId,
667
+ reason: "hangup-bot",
668
+ });
669
+ } catch (err) {
670
+ console.warn(
671
+ `[voice-call] Failed to reject inbound call ${event.providerCallId}:`,
672
+ err instanceof Error ? err.message : err,
673
+ );
674
+ }
675
+ }
676
+
656
677
  private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
657
678
  const initialMessage =
658
679
  typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : "";
@@ -21,6 +21,8 @@ import type {
21
21
  export interface MediaStreamConfig {
22
22
  /** STT provider for transcription */
23
23
  sttProvider: OpenAIRealtimeSTTProvider;
24
+ /** Validate whether to accept a media stream for the given call ID */
25
+ shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
24
26
  /** Callback when transcript is received */
25
27
  onTranscript?: (callId: string, transcript: string) => void;
26
28
  /** Callback for partial transcripts (streaming UI) */
@@ -87,6 +89,7 @@ export class MediaStreamHandler {
87
89
  */
88
90
  private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
89
91
  let session: StreamSession | null = null;
92
+ const streamToken = this.getStreamToken(_request);
90
93
 
91
94
  ws.on("message", async (data: Buffer) => {
92
95
  try {
@@ -98,7 +101,7 @@ export class MediaStreamHandler {
98
101
  break;
99
102
 
100
103
  case "start":
101
- session = await this.handleStart(ws, message);
104
+ session = await this.handleStart(ws, message, streamToken);
102
105
  break;
103
106
 
104
107
  case "media":
@@ -135,11 +138,28 @@ export class MediaStreamHandler {
135
138
  /**
136
139
  * Handle stream start event.
137
140
  */
138
- private async handleStart(ws: WebSocket, message: TwilioMediaMessage): Promise<StreamSession> {
141
+ private async handleStart(
142
+ ws: WebSocket,
143
+ message: TwilioMediaMessage,
144
+ streamToken?: string,
145
+ ): Promise<StreamSession | null> {
139
146
  const streamSid = message.streamSid || "";
140
147
  const callSid = message.start?.callSid || "";
141
148
 
142
149
  console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
150
+ if (!callSid) {
151
+ console.warn("[MediaStream] Missing callSid; closing stream");
152
+ ws.close(1008, "Missing callSid");
153
+ return null;
154
+ }
155
+ if (
156
+ this.config.shouldAcceptStream &&
157
+ !this.config.shouldAcceptStream({ callId: callSid, streamSid, token: streamToken })
158
+ ) {
159
+ console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
160
+ ws.close(1008, "Unknown call");
161
+ return null;
162
+ }
143
163
 
144
164
  // Create STT session
145
165
  const sttSession = this.config.sttProvider.createSession();
@@ -189,6 +209,18 @@ export class MediaStreamHandler {
189
209
  this.config.onDisconnect?.(session.callId);
190
210
  }
191
211
 
212
+ private getStreamToken(request: IncomingMessage): string | undefined {
213
+ if (!request.url || !request.headers.host) {
214
+ return undefined;
215
+ }
216
+ try {
217
+ const url = new URL(request.url, `http://${request.headers.host}`);
218
+ return url.searchParams.get("token") ?? undefined;
219
+ } catch {
220
+ return undefined;
221
+ }
222
+ }
223
+
192
224
  /**
193
225
  * Get an active session with an open WebSocket, or undefined if unavailable.
194
226
  */
@@ -21,15 +21,21 @@ import type { VoiceCallProvider } from "./base.js";
21
21
  * Uses Telnyx Call Control API v2 for managing calls.
22
22
  * @see https://developers.telnyx.com/docs/api/v2/call-control
23
23
  */
24
+ export interface TelnyxProviderOptions {
25
+ /** Allow unsigned webhooks when no public key is configured */
26
+ allowUnsignedWebhooks?: boolean;
27
+ }
28
+
24
29
  export class TelnyxProvider implements VoiceCallProvider {
25
30
  readonly name = "telnyx" as const;
26
31
 
27
32
  private readonly apiKey: string;
28
33
  private readonly connectionId: string;
29
34
  private readonly publicKey: string | undefined;
35
+ private readonly options: TelnyxProviderOptions;
30
36
  private readonly baseUrl = "https://api.telnyx.com/v2";
31
37
 
32
- constructor(config: TelnyxConfig) {
38
+ constructor(config: TelnyxConfig, options: TelnyxProviderOptions = {}) {
33
39
  if (!config.apiKey) {
34
40
  throw new Error("Telnyx API key is required");
35
41
  }
@@ -40,6 +46,7 @@ export class TelnyxProvider implements VoiceCallProvider {
40
46
  this.apiKey = config.apiKey;
41
47
  this.connectionId = config.connectionId;
42
48
  this.publicKey = config.publicKey;
49
+ this.options = options;
43
50
  }
44
51
 
45
52
  /**
@@ -76,8 +83,14 @@ export class TelnyxProvider implements VoiceCallProvider {
76
83
  */
77
84
  verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
78
85
  if (!this.publicKey) {
79
- // No public key configured, skip verification (not recommended for production)
80
- return { ok: true };
86
+ if (this.options.allowUnsignedWebhooks) {
87
+ console.warn("[telnyx] Webhook verification skipped (no public key configured)");
88
+ return { ok: true, reason: "verification skipped (no public key configured)" };
89
+ }
90
+ return {
91
+ ok: false,
92
+ reason: "Missing telnyx.publicKey (configure to verify webhooks)",
93
+ };
81
94
  }
82
95
 
83
96
  const signature = ctx.headers["telnyx-signature-ed25519"];
@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
2
2
  import type { WebhookContext } from "../types.js";
3
3
  import { TwilioProvider } from "./twilio.js";
4
4
 
5
- const STREAM_URL = "wss://example.ngrok.app/voice/stream";
5
+ const STREAM_URL_PREFIX = "wss://example.ngrok.app/voice/stream?token=";
6
6
 
7
7
  function createProvider(): TwilioProvider {
8
8
  return new TwilioProvider(
@@ -24,13 +24,13 @@ function createContext(rawBody: string, query?: WebhookContext["query"]): Webhoo
24
24
  describe("TwilioProvider", () => {
25
25
  it("returns streaming TwiML for outbound conversation calls before in-progress", () => {
26
26
  const provider = createProvider();
27
- const ctx = createContext("CallStatus=initiated&Direction=outbound-api", {
27
+ const ctx = createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA123", {
28
28
  callId: "call-1",
29
29
  });
30
30
 
31
31
  const result = provider.parseWebhookEvent(ctx);
32
32
 
33
- expect(result.providerResponseBody).toContain(STREAM_URL);
33
+ expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
34
34
  expect(result.providerResponseBody).toContain("<Connect>");
35
35
  });
36
36
 
@@ -50,11 +50,11 @@ describe("TwilioProvider", () => {
50
50
 
51
51
  it("returns streaming TwiML for inbound calls", () => {
52
52
  const provider = createProvider();
53
- const ctx = createContext("CallStatus=ringing&Direction=inbound");
53
+ const ctx = createContext("CallStatus=ringing&Direction=inbound&CallSid=CA456");
54
54
 
55
55
  const result = provider.parseWebhookEvent(ctx);
56
56
 
57
- expect(result.providerResponseBody).toContain(STREAM_URL);
57
+ expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
58
58
  expect(result.providerResponseBody).toContain("<Connect>");
59
59
  });
60
60
  });
@@ -60,6 +60,8 @@ export class TwilioProvider implements VoiceCallProvider {
60
60
 
61
61
  /** Map of call SID to stream SID for media streams */
62
62
  private callStreamMap = new Map<string, string>();
63
+ /** Per-call tokens for media stream authentication */
64
+ private streamAuthTokens = new Map<string, string>();
63
65
 
64
66
  /** Storage for TwiML content (for notify mode with URL-based TwiML) */
65
67
  private readonly twimlStorage = new Map<string, string>();
@@ -94,6 +96,7 @@ export class TwilioProvider implements VoiceCallProvider {
94
96
  }
95
97
 
96
98
  this.deleteStoredTwiml(callIdMatch[1]);
99
+ this.streamAuthTokens.delete(providerCallId);
97
100
  }
98
101
 
99
102
  constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) {
@@ -138,6 +141,19 @@ export class TwilioProvider implements VoiceCallProvider {
138
141
  this.callStreamMap.delete(callSid);
139
142
  }
140
143
 
144
+ isValidStreamToken(callSid: string, token?: string): boolean {
145
+ const expected = this.streamAuthTokens.get(callSid);
146
+ if (!expected || !token) {
147
+ return false;
148
+ }
149
+ if (expected.length !== token.length) {
150
+ const dummy = Buffer.from(expected);
151
+ crypto.timingSafeEqual(dummy, dummy);
152
+ return false;
153
+ }
154
+ return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
155
+ }
156
+
141
157
  /**
142
158
  * Clear TTS queue for a call (barge-in).
143
159
  * Used when user starts speaking to interrupt current TTS playback.
@@ -271,11 +287,13 @@ export class TwilioProvider implements VoiceCallProvider {
271
287
  case "busy":
272
288
  case "no-answer":
273
289
  case "failed":
290
+ this.streamAuthTokens.delete(callSid);
274
291
  if (callIdOverride) {
275
292
  this.deleteStoredTwiml(callIdOverride);
276
293
  }
277
294
  return { ...baseEvent, type: "call.ended", reason: callStatus };
278
295
  case "canceled":
296
+ this.streamAuthTokens.delete(callSid);
279
297
  if (callIdOverride) {
280
298
  this.deleteStoredTwiml(callIdOverride);
281
299
  }
@@ -308,6 +326,7 @@ export class TwilioProvider implements VoiceCallProvider {
308
326
  const callStatus = params.get("CallStatus");
309
327
  const direction = params.get("Direction");
310
328
  const isOutbound = direction?.startsWith("outbound") ?? false;
329
+ const callSid = params.get("CallSid") || undefined;
311
330
  const callIdFromQuery =
312
331
  typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
313
332
  ? ctx.query.callId.trim()
@@ -330,7 +349,7 @@ export class TwilioProvider implements VoiceCallProvider {
330
349
 
331
350
  // Conversation mode: return streaming TwiML immediately for outbound calls.
332
351
  if (isOutbound) {
333
- const streamUrl = this.getStreamUrl();
352
+ const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
334
353
  return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
335
354
  }
336
355
  }
@@ -343,7 +362,7 @@ export class TwilioProvider implements VoiceCallProvider {
343
362
  // Handle subsequent webhook requests (status callbacks, etc.)
344
363
  // For inbound calls, answer immediately with stream
345
364
  if (direction === "inbound") {
346
- const streamUrl = this.getStreamUrl();
365
+ const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
347
366
  return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
348
367
  }
349
368
 
@@ -352,7 +371,7 @@ export class TwilioProvider implements VoiceCallProvider {
352
371
  return TwilioProvider.EMPTY_TWIML;
353
372
  }
354
373
 
355
- const streamUrl = this.getStreamUrl();
374
+ const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
356
375
  return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
357
376
  }
358
377
 
@@ -380,6 +399,27 @@ export class TwilioProvider implements VoiceCallProvider {
380
399
  return `${wsOrigin}${path}`;
381
400
  }
382
401
 
402
+ private getStreamAuthToken(callSid: string): string {
403
+ const existing = this.streamAuthTokens.get(callSid);
404
+ if (existing) {
405
+ return existing;
406
+ }
407
+ const token = crypto.randomBytes(16).toString("base64url");
408
+ this.streamAuthTokens.set(callSid, token);
409
+ return token;
410
+ }
411
+
412
+ private getStreamUrlForCall(callSid: string): string | null {
413
+ const baseUrl = this.getStreamUrl();
414
+ if (!baseUrl) {
415
+ return null;
416
+ }
417
+ const token = this.getStreamAuthToken(callSid);
418
+ const url = new URL(baseUrl);
419
+ url.searchParams.set("token", token);
420
+ return url.toString();
421
+ }
422
+
383
423
  /**
384
424
  * Generate TwiML to connect a call to a WebSocket media stream.
385
425
  * This enables bidirectional audio streaming for real-time STT/TTS.
@@ -444,6 +484,7 @@ export class TwilioProvider implements VoiceCallProvider {
444
484
  this.deleteStoredTwimlForProviderCall(input.providerCallId);
445
485
 
446
486
  this.callWebhookUrls.delete(input.providerCallId);
487
+ this.streamAuthTokens.delete(input.providerCallId);
447
488
 
448
489
  await this.apiRequest(
449
490
  `/Calls/${input.providerCallId}.json`,
package/src/runtime.ts CHANGED
@@ -48,11 +48,17 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
48
48
 
49
49
  switch (config.provider) {
50
50
  case "telnyx":
51
- return new TelnyxProvider({
52
- apiKey: config.telnyx?.apiKey,
53
- connectionId: config.telnyx?.connectionId,
54
- publicKey: config.telnyx?.publicKey,
55
- });
51
+ return new TelnyxProvider(
52
+ {
53
+ apiKey: config.telnyx?.apiKey,
54
+ connectionId: config.telnyx?.connectionId,
55
+ publicKey: config.telnyx?.publicKey,
56
+ },
57
+ {
58
+ allowUnsignedWebhooks:
59
+ config.inboundPolicy === "open" || config.inboundPolicy === "disabled",
60
+ },
61
+ );
56
62
  case "twilio":
57
63
  return new TwilioProvider(
58
64
  {
package/src/webhook.ts CHANGED
@@ -11,6 +11,8 @@ import type { NormalizedEvent, WebhookContext } from "./types.js";
11
11
  import { MediaStreamHandler } from "./media-stream.js";
12
12
  import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
13
13
 
14
+ const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
15
+
14
16
  /**
15
17
  * HTTP server for receiving voice call webhooks from providers.
16
18
  * Supports WebSocket upgrades for media streams when streaming is enabled.
@@ -69,6 +71,20 @@ export class VoiceCallWebhookServer {
69
71
 
70
72
  const streamConfig: MediaStreamConfig = {
71
73
  sttProvider,
74
+ shouldAcceptStream: ({ callId, token }) => {
75
+ const call = this.manager.getCallByProviderCallId(callId);
76
+ if (!call) {
77
+ return false;
78
+ }
79
+ if (this.provider.name === "twilio") {
80
+ const twilio = this.provider as TwilioProvider;
81
+ if (!twilio.isValidStreamToken(callId, token)) {
82
+ console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`);
83
+ return false;
84
+ }
85
+ }
86
+ return true;
87
+ },
72
88
  onTranscript: (providerCallId, transcript) => {
73
89
  console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
74
90
 
@@ -224,7 +240,17 @@ export class VoiceCallWebhookServer {
224
240
  }
225
241
 
226
242
  // Read body
227
- const body = await this.readBody(req);
243
+ let body = "";
244
+ try {
245
+ body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES);
246
+ } catch (err) {
247
+ if (err instanceof Error && err.message === "PayloadTooLarge") {
248
+ res.statusCode = 413;
249
+ res.end("Payload Too Large");
250
+ return;
251
+ }
252
+ throw err;
253
+ }
228
254
 
229
255
  // Build webhook context
230
256
  const ctx: WebhookContext = {
@@ -272,10 +298,19 @@ export class VoiceCallWebhookServer {
272
298
  /**
273
299
  * Read request body as string.
274
300
  */
275
- private readBody(req: http.IncomingMessage): Promise<string> {
301
+ private readBody(req: http.IncomingMessage, maxBytes: number): Promise<string> {
276
302
  return new Promise((resolve, reject) => {
277
303
  const chunks: Buffer[] = [];
278
- req.on("data", (chunk) => chunks.push(chunk));
304
+ let totalBytes = 0;
305
+ req.on("data", (chunk: Buffer) => {
306
+ totalBytes += chunk.length;
307
+ if (totalBytes > maxBytes) {
308
+ req.destroy();
309
+ reject(new Error("PayloadTooLarge"));
310
+ return;
311
+ }
312
+ chunks.push(chunk);
313
+ });
279
314
  req.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
280
315
  req.on("error", reject);
281
316
  });