@openclaw/voice-call 2026.2.1 → 2026.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +1 -1
- package/package.json +1 -1
- package/src/allowlist.ts +19 -0
- package/src/config.test.ts +28 -0
- package/src/config.ts +8 -0
- package/src/core-bridge.ts +19 -60
- package/src/manager/events.ts +7 -5
- package/src/manager.test.ts +90 -1
- package/src/manager.ts +27 -6
- package/src/media-stream.ts +34 -2
- package/src/providers/telnyx.ts +16 -3
- package/src/providers/twilio.test.ts +5 -5
- package/src/providers/twilio.ts +44 -3
- package/src/runtime.ts +11 -5
- package/src/webhook.ts +38 -3
package/CHANGELOG.md
CHANGED
package/package.json
CHANGED
package/src/allowlist.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
export function normalizePhoneNumber(input?: string): string {
|
|
2
|
+
if (!input) {
|
|
3
|
+
return "";
|
|
4
|
+
}
|
|
5
|
+
return input.replace(/\D/g, "");
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export function isAllowlistedCaller(
|
|
9
|
+
normalizedFrom: string,
|
|
10
|
+
allowFrom: string[] | undefined,
|
|
11
|
+
): boolean {
|
|
12
|
+
if (!normalizedFrom) {
|
|
13
|
+
return false;
|
|
14
|
+
}
|
|
15
|
+
return (allowFrom ?? []).some((num) => {
|
|
16
|
+
const normalizedAllow = normalizePhoneNumber(num);
|
|
17
|
+
return normalizedAllow !== "" && normalizedAllow === normalizedFrom;
|
|
18
|
+
});
|
|
19
|
+
}
|
package/src/config.test.ts
CHANGED
|
@@ -148,6 +148,34 @@ describe("validateProviderConfig", () => {
|
|
|
148
148
|
"plugins.entries.voice-call.config.telnyx.apiKey is required (or set TELNYX_API_KEY env)",
|
|
149
149
|
);
|
|
150
150
|
});
|
|
151
|
+
|
|
152
|
+
it("fails validation when allowlist inbound policy lacks public key", () => {
|
|
153
|
+
const config = createBaseConfig("telnyx");
|
|
154
|
+
config.inboundPolicy = "allowlist";
|
|
155
|
+
config.telnyx = { apiKey: "KEY123", connectionId: "CONN456" };
|
|
156
|
+
|
|
157
|
+
const result = validateProviderConfig(config);
|
|
158
|
+
|
|
159
|
+
expect(result.valid).toBe(false);
|
|
160
|
+
expect(result.errors).toContain(
|
|
161
|
+
"plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
|
|
162
|
+
);
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
it("passes validation when allowlist inbound policy has public key", () => {
|
|
166
|
+
const config = createBaseConfig("telnyx");
|
|
167
|
+
config.inboundPolicy = "allowlist";
|
|
168
|
+
config.telnyx = {
|
|
169
|
+
apiKey: "KEY123",
|
|
170
|
+
connectionId: "CONN456",
|
|
171
|
+
publicKey: "public-key",
|
|
172
|
+
};
|
|
173
|
+
|
|
174
|
+
const result = validateProviderConfig(config);
|
|
175
|
+
|
|
176
|
+
expect(result.valid).toBe(true);
|
|
177
|
+
expect(result.errors).toEqual([]);
|
|
178
|
+
});
|
|
151
179
|
});
|
|
152
180
|
|
|
153
181
|
describe("plivo provider", () => {
|
package/src/config.ts
CHANGED
|
@@ -448,6 +448,14 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
448
448
|
"plugins.entries.voice-call.config.telnyx.connectionId is required (or set TELNYX_CONNECTION_ID env)",
|
|
449
449
|
);
|
|
450
450
|
}
|
|
451
|
+
if (
|
|
452
|
+
(config.inboundPolicy === "allowlist" || config.inboundPolicy === "pairing") &&
|
|
453
|
+
!config.telnyx?.publicKey
|
|
454
|
+
) {
|
|
455
|
+
errors.push(
|
|
456
|
+
"plugins.entries.voice-call.config.telnyx.publicKey is required for inboundPolicy allowlist/pairing",
|
|
457
|
+
);
|
|
458
|
+
}
|
|
451
459
|
}
|
|
452
460
|
|
|
453
461
|
if (config.provider === "twilio") {
|
package/src/core-bridge.ts
CHANGED
|
@@ -121,15 +121,29 @@ function resolveOpenClawRoot(): string {
|
|
|
121
121
|
throw new Error("Unable to resolve core root. Set OPENCLAW_ROOT to the package root.");
|
|
122
122
|
}
|
|
123
123
|
|
|
124
|
-
async function
|
|
125
|
-
|
|
126
|
-
|
|
124
|
+
async function importCoreExtensionAPI(): Promise<{
|
|
125
|
+
resolveAgentDir: CoreAgentDeps["resolveAgentDir"];
|
|
126
|
+
resolveAgentWorkspaceDir: CoreAgentDeps["resolveAgentWorkspaceDir"];
|
|
127
|
+
DEFAULT_MODEL: string;
|
|
128
|
+
DEFAULT_PROVIDER: string;
|
|
129
|
+
resolveAgentIdentity: CoreAgentDeps["resolveAgentIdentity"];
|
|
130
|
+
resolveThinkingDefault: CoreAgentDeps["resolveThinkingDefault"];
|
|
131
|
+
runEmbeddedPiAgent: CoreAgentDeps["runEmbeddedPiAgent"];
|
|
132
|
+
resolveAgentTimeoutMs: CoreAgentDeps["resolveAgentTimeoutMs"];
|
|
133
|
+
ensureAgentWorkspace: CoreAgentDeps["ensureAgentWorkspace"];
|
|
134
|
+
resolveStorePath: CoreAgentDeps["resolveStorePath"];
|
|
135
|
+
loadSessionStore: CoreAgentDeps["loadSessionStore"];
|
|
136
|
+
saveSessionStore: CoreAgentDeps["saveSessionStore"];
|
|
137
|
+
resolveSessionFilePath: CoreAgentDeps["resolveSessionFilePath"];
|
|
138
|
+
}> {
|
|
139
|
+
// Do not import any other module. You can't touch this or you will be fired.
|
|
140
|
+
const distPath = path.join(resolveOpenClawRoot(), "dist", "extensionAPI.js");
|
|
127
141
|
if (!fs.existsSync(distPath)) {
|
|
128
142
|
throw new Error(
|
|
129
143
|
`Missing core module at ${distPath}. Run \`pnpm build\` or install the official package.`,
|
|
130
144
|
);
|
|
131
145
|
}
|
|
132
|
-
return
|
|
146
|
+
return await import(pathToFileURL(distPath).href);
|
|
133
147
|
}
|
|
134
148
|
|
|
135
149
|
export async function loadCoreAgentDeps(): Promise<CoreAgentDeps> {
|
|
@@ -138,62 +152,7 @@ export async function loadCoreAgentDeps(): Promise<CoreAgentDeps> {
|
|
|
138
152
|
}
|
|
139
153
|
|
|
140
154
|
coreDepsPromise = (async () => {
|
|
141
|
-
|
|
142
|
-
agentScope,
|
|
143
|
-
defaults,
|
|
144
|
-
identity,
|
|
145
|
-
modelSelection,
|
|
146
|
-
piEmbedded,
|
|
147
|
-
timeout,
|
|
148
|
-
workspace,
|
|
149
|
-
sessions,
|
|
150
|
-
] = await Promise.all([
|
|
151
|
-
importCoreModule<{
|
|
152
|
-
resolveAgentDir: CoreAgentDeps["resolveAgentDir"];
|
|
153
|
-
resolveAgentWorkspaceDir: CoreAgentDeps["resolveAgentWorkspaceDir"];
|
|
154
|
-
}>("agents/agent-scope.js"),
|
|
155
|
-
importCoreModule<{
|
|
156
|
-
DEFAULT_MODEL: string;
|
|
157
|
-
DEFAULT_PROVIDER: string;
|
|
158
|
-
}>("agents/defaults.js"),
|
|
159
|
-
importCoreModule<{
|
|
160
|
-
resolveAgentIdentity: CoreAgentDeps["resolveAgentIdentity"];
|
|
161
|
-
}>("agents/identity.js"),
|
|
162
|
-
importCoreModule<{
|
|
163
|
-
resolveThinkingDefault: CoreAgentDeps["resolveThinkingDefault"];
|
|
164
|
-
}>("agents/model-selection.js"),
|
|
165
|
-
importCoreModule<{
|
|
166
|
-
runEmbeddedPiAgent: CoreAgentDeps["runEmbeddedPiAgent"];
|
|
167
|
-
}>("agents/pi-embedded.js"),
|
|
168
|
-
importCoreModule<{
|
|
169
|
-
resolveAgentTimeoutMs: CoreAgentDeps["resolveAgentTimeoutMs"];
|
|
170
|
-
}>("agents/timeout.js"),
|
|
171
|
-
importCoreModule<{
|
|
172
|
-
ensureAgentWorkspace: CoreAgentDeps["ensureAgentWorkspace"];
|
|
173
|
-
}>("agents/workspace.js"),
|
|
174
|
-
importCoreModule<{
|
|
175
|
-
resolveStorePath: CoreAgentDeps["resolveStorePath"];
|
|
176
|
-
loadSessionStore: CoreAgentDeps["loadSessionStore"];
|
|
177
|
-
saveSessionStore: CoreAgentDeps["saveSessionStore"];
|
|
178
|
-
resolveSessionFilePath: CoreAgentDeps["resolveSessionFilePath"];
|
|
179
|
-
}>("config/sessions.js"),
|
|
180
|
-
]);
|
|
181
|
-
|
|
182
|
-
return {
|
|
183
|
-
resolveAgentDir: agentScope.resolveAgentDir,
|
|
184
|
-
resolveAgentWorkspaceDir: agentScope.resolveAgentWorkspaceDir,
|
|
185
|
-
resolveAgentIdentity: identity.resolveAgentIdentity,
|
|
186
|
-
resolveThinkingDefault: modelSelection.resolveThinkingDefault,
|
|
187
|
-
runEmbeddedPiAgent: piEmbedded.runEmbeddedPiAgent,
|
|
188
|
-
resolveAgentTimeoutMs: timeout.resolveAgentTimeoutMs,
|
|
189
|
-
ensureAgentWorkspace: workspace.ensureAgentWorkspace,
|
|
190
|
-
resolveStorePath: sessions.resolveStorePath,
|
|
191
|
-
loadSessionStore: sessions.loadSessionStore,
|
|
192
|
-
saveSessionStore: sessions.saveSessionStore,
|
|
193
|
-
resolveSessionFilePath: sessions.resolveSessionFilePath,
|
|
194
|
-
DEFAULT_MODEL: defaults.DEFAULT_MODEL,
|
|
195
|
-
DEFAULT_PROVIDER: defaults.DEFAULT_PROVIDER,
|
|
196
|
-
};
|
|
155
|
+
return await importCoreExtensionAPI();
|
|
197
156
|
})();
|
|
198
157
|
|
|
199
158
|
return coreDepsPromise;
|
package/src/manager/events.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import crypto from "node:crypto";
|
|
2
2
|
import type { CallRecord, CallState, NormalizedEvent } from "../types.js";
|
|
3
3
|
import type { CallManagerContext } from "./context.js";
|
|
4
|
+
import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
|
|
4
5
|
import { findCall } from "./lookup.js";
|
|
5
6
|
import { endCall } from "./outbound.js";
|
|
6
7
|
import { addTranscriptEntry, transitionState } from "./state.js";
|
|
@@ -29,11 +30,12 @@ function shouldAcceptInbound(
|
|
|
29
30
|
|
|
30
31
|
case "allowlist":
|
|
31
32
|
case "pairing": {
|
|
32
|
-
const normalized = from
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
return
|
|
36
|
-
}
|
|
33
|
+
const normalized = normalizePhoneNumber(from);
|
|
34
|
+
if (!normalized) {
|
|
35
|
+
console.log("[voice-call] Inbound call rejected: missing caller ID");
|
|
36
|
+
return false;
|
|
37
|
+
}
|
|
38
|
+
const allowed = isAllowlistedCaller(normalized, allowFrom);
|
|
37
39
|
const status = allowed ? "accepted" : "rejected";
|
|
38
40
|
console.log(
|
|
39
41
|
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
|
package/src/manager.test.ts
CHANGED
|
@@ -19,6 +19,7 @@ import { CallManager } from "./manager.js";
|
|
|
19
19
|
class FakeProvider implements VoiceCallProvider {
|
|
20
20
|
readonly name = "plivo" as const;
|
|
21
21
|
readonly playTtsCalls: PlayTtsInput[] = [];
|
|
22
|
+
readonly hangupCalls: HangupCallInput[] = [];
|
|
22
23
|
|
|
23
24
|
verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
|
|
24
25
|
return { ok: true };
|
|
@@ -29,7 +30,9 @@ class FakeProvider implements VoiceCallProvider {
|
|
|
29
30
|
async initiateCall(_input: InitiateCallInput): Promise<InitiateCallResult> {
|
|
30
31
|
return { providerCallId: "request-uuid", status: "initiated" };
|
|
31
32
|
}
|
|
32
|
-
async hangupCall(
|
|
33
|
+
async hangupCall(input: HangupCallInput): Promise<void> {
|
|
34
|
+
this.hangupCalls.push(input);
|
|
35
|
+
}
|
|
33
36
|
async playTts(input: PlayTtsInput): Promise<void> {
|
|
34
37
|
this.playTtsCalls.push(input);
|
|
35
38
|
}
|
|
@@ -102,4 +105,90 @@ describe("CallManager", () => {
|
|
|
102
105
|
expect(provider.playTtsCalls).toHaveLength(1);
|
|
103
106
|
expect(provider.playTtsCalls[0]?.text).toBe("Hello there");
|
|
104
107
|
});
|
|
108
|
+
|
|
109
|
+
it("rejects inbound calls with missing caller ID when allowlist enabled", () => {
|
|
110
|
+
const config = VoiceCallConfigSchema.parse({
|
|
111
|
+
enabled: true,
|
|
112
|
+
provider: "plivo",
|
|
113
|
+
fromNumber: "+15550000000",
|
|
114
|
+
inboundPolicy: "allowlist",
|
|
115
|
+
allowFrom: ["+15550001234"],
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
119
|
+
const provider = new FakeProvider();
|
|
120
|
+
const manager = new CallManager(config, storePath);
|
|
121
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
122
|
+
|
|
123
|
+
manager.processEvent({
|
|
124
|
+
id: "evt-allowlist-missing",
|
|
125
|
+
type: "call.initiated",
|
|
126
|
+
callId: "call-missing",
|
|
127
|
+
providerCallId: "provider-missing",
|
|
128
|
+
timestamp: Date.now(),
|
|
129
|
+
direction: "inbound",
|
|
130
|
+
to: "+15550000000",
|
|
131
|
+
});
|
|
132
|
+
|
|
133
|
+
expect(manager.getCallByProviderCallId("provider-missing")).toBeUndefined();
|
|
134
|
+
expect(provider.hangupCalls).toHaveLength(1);
|
|
135
|
+
expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-missing");
|
|
136
|
+
});
|
|
137
|
+
|
|
138
|
+
it("rejects inbound calls that only match allowlist suffixes", () => {
|
|
139
|
+
const config = VoiceCallConfigSchema.parse({
|
|
140
|
+
enabled: true,
|
|
141
|
+
provider: "plivo",
|
|
142
|
+
fromNumber: "+15550000000",
|
|
143
|
+
inboundPolicy: "allowlist",
|
|
144
|
+
allowFrom: ["+15550001234"],
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
148
|
+
const provider = new FakeProvider();
|
|
149
|
+
const manager = new CallManager(config, storePath);
|
|
150
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
151
|
+
|
|
152
|
+
manager.processEvent({
|
|
153
|
+
id: "evt-allowlist-suffix",
|
|
154
|
+
type: "call.initiated",
|
|
155
|
+
callId: "call-suffix",
|
|
156
|
+
providerCallId: "provider-suffix",
|
|
157
|
+
timestamp: Date.now(),
|
|
158
|
+
direction: "inbound",
|
|
159
|
+
from: "+99915550001234",
|
|
160
|
+
to: "+15550000000",
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
expect(manager.getCallByProviderCallId("provider-suffix")).toBeUndefined();
|
|
164
|
+
expect(provider.hangupCalls).toHaveLength(1);
|
|
165
|
+
expect(provider.hangupCalls[0]?.providerCallId).toBe("provider-suffix");
|
|
166
|
+
});
|
|
167
|
+
|
|
168
|
+
it("accepts inbound calls that exactly match the allowlist", () => {
|
|
169
|
+
const config = VoiceCallConfigSchema.parse({
|
|
170
|
+
enabled: true,
|
|
171
|
+
provider: "plivo",
|
|
172
|
+
fromNumber: "+15550000000",
|
|
173
|
+
inboundPolicy: "allowlist",
|
|
174
|
+
allowFrom: ["+15550001234"],
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
178
|
+
const manager = new CallManager(config, storePath);
|
|
179
|
+
manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
|
|
180
|
+
|
|
181
|
+
manager.processEvent({
|
|
182
|
+
id: "evt-allowlist-exact",
|
|
183
|
+
type: "call.initiated",
|
|
184
|
+
callId: "call-exact",
|
|
185
|
+
providerCallId: "provider-exact",
|
|
186
|
+
timestamp: Date.now(),
|
|
187
|
+
direction: "inbound",
|
|
188
|
+
from: "+15550001234",
|
|
189
|
+
to: "+15550000000",
|
|
190
|
+
});
|
|
191
|
+
|
|
192
|
+
expect(manager.getCallByProviderCallId("provider-exact")).toBeDefined();
|
|
193
|
+
});
|
|
105
194
|
});
|
package/src/manager.ts
CHANGED
|
@@ -5,6 +5,7 @@ import os from "node:os";
|
|
|
5
5
|
import path from "node:path";
|
|
6
6
|
import type { CallMode, VoiceCallConfig } from "./config.js";
|
|
7
7
|
import type { VoiceCallProvider } from "./providers/base.js";
|
|
8
|
+
import { isAllowlistedCaller, normalizePhoneNumber } from "./allowlist.js";
|
|
8
9
|
import {
|
|
9
10
|
type CallId,
|
|
10
11
|
type CallRecord,
|
|
@@ -474,11 +475,12 @@ export class CallManager {
|
|
|
474
475
|
|
|
475
476
|
case "allowlist":
|
|
476
477
|
case "pairing": {
|
|
477
|
-
const normalized = from
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
return
|
|
481
|
-
}
|
|
478
|
+
const normalized = normalizePhoneNumber(from);
|
|
479
|
+
if (!normalized) {
|
|
480
|
+
console.log("[voice-call] Inbound call rejected: missing caller ID");
|
|
481
|
+
return false;
|
|
482
|
+
}
|
|
483
|
+
const allowed = isAllowlistedCaller(normalized, allowFrom);
|
|
482
484
|
const status = allowed ? "accepted" : "rejected";
|
|
483
485
|
console.log(
|
|
484
486
|
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
|
|
@@ -551,7 +553,7 @@ export class CallManager {
|
|
|
551
553
|
if (!call && event.direction === "inbound" && event.providerCallId) {
|
|
552
554
|
// Check if we should accept this inbound call
|
|
553
555
|
if (!this.shouldAcceptInbound(event.from)) {
|
|
554
|
-
|
|
556
|
+
void this.rejectInboundCall(event);
|
|
555
557
|
return;
|
|
556
558
|
}
|
|
557
559
|
|
|
@@ -653,6 +655,25 @@ export class CallManager {
|
|
|
653
655
|
this.persistCallRecord(call);
|
|
654
656
|
}
|
|
655
657
|
|
|
658
|
+
private async rejectInboundCall(event: NormalizedEvent): Promise<void> {
|
|
659
|
+
if (!this.provider || !event.providerCallId) {
|
|
660
|
+
return;
|
|
661
|
+
}
|
|
662
|
+
const callId = event.callId || event.providerCallId;
|
|
663
|
+
try {
|
|
664
|
+
await this.provider.hangupCall({
|
|
665
|
+
callId,
|
|
666
|
+
providerCallId: event.providerCallId,
|
|
667
|
+
reason: "hangup-bot",
|
|
668
|
+
});
|
|
669
|
+
} catch (err) {
|
|
670
|
+
console.warn(
|
|
671
|
+
`[voice-call] Failed to reject inbound call ${event.providerCallId}:`,
|
|
672
|
+
err instanceof Error ? err.message : err,
|
|
673
|
+
);
|
|
674
|
+
}
|
|
675
|
+
}
|
|
676
|
+
|
|
656
677
|
private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
|
|
657
678
|
const initialMessage =
|
|
658
679
|
typeof call.metadata?.initialMessage === "string" ? call.metadata.initialMessage.trim() : "";
|
package/src/media-stream.ts
CHANGED
|
@@ -21,6 +21,8 @@ import type {
|
|
|
21
21
|
export interface MediaStreamConfig {
|
|
22
22
|
/** STT provider for transcription */
|
|
23
23
|
sttProvider: OpenAIRealtimeSTTProvider;
|
|
24
|
+
/** Validate whether to accept a media stream for the given call ID */
|
|
25
|
+
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
|
|
24
26
|
/** Callback when transcript is received */
|
|
25
27
|
onTranscript?: (callId: string, transcript: string) => void;
|
|
26
28
|
/** Callback for partial transcripts (streaming UI) */
|
|
@@ -87,6 +89,7 @@ export class MediaStreamHandler {
|
|
|
87
89
|
*/
|
|
88
90
|
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
|
|
89
91
|
let session: StreamSession | null = null;
|
|
92
|
+
const streamToken = this.getStreamToken(_request);
|
|
90
93
|
|
|
91
94
|
ws.on("message", async (data: Buffer) => {
|
|
92
95
|
try {
|
|
@@ -98,7 +101,7 @@ export class MediaStreamHandler {
|
|
|
98
101
|
break;
|
|
99
102
|
|
|
100
103
|
case "start":
|
|
101
|
-
session = await this.handleStart(ws, message);
|
|
104
|
+
session = await this.handleStart(ws, message, streamToken);
|
|
102
105
|
break;
|
|
103
106
|
|
|
104
107
|
case "media":
|
|
@@ -135,11 +138,28 @@ export class MediaStreamHandler {
|
|
|
135
138
|
/**
|
|
136
139
|
* Handle stream start event.
|
|
137
140
|
*/
|
|
138
|
-
private async handleStart(
|
|
141
|
+
private async handleStart(
|
|
142
|
+
ws: WebSocket,
|
|
143
|
+
message: TwilioMediaMessage,
|
|
144
|
+
streamToken?: string,
|
|
145
|
+
): Promise<StreamSession | null> {
|
|
139
146
|
const streamSid = message.streamSid || "";
|
|
140
147
|
const callSid = message.start?.callSid || "";
|
|
141
148
|
|
|
142
149
|
console.log(`[MediaStream] Stream started: ${streamSid} (call: ${callSid})`);
|
|
150
|
+
if (!callSid) {
|
|
151
|
+
console.warn("[MediaStream] Missing callSid; closing stream");
|
|
152
|
+
ws.close(1008, "Missing callSid");
|
|
153
|
+
return null;
|
|
154
|
+
}
|
|
155
|
+
if (
|
|
156
|
+
this.config.shouldAcceptStream &&
|
|
157
|
+
!this.config.shouldAcceptStream({ callId: callSid, streamSid, token: streamToken })
|
|
158
|
+
) {
|
|
159
|
+
console.warn(`[MediaStream] Rejecting stream for unknown call: ${callSid}`);
|
|
160
|
+
ws.close(1008, "Unknown call");
|
|
161
|
+
return null;
|
|
162
|
+
}
|
|
143
163
|
|
|
144
164
|
// Create STT session
|
|
145
165
|
const sttSession = this.config.sttProvider.createSession();
|
|
@@ -189,6 +209,18 @@ export class MediaStreamHandler {
|
|
|
189
209
|
this.config.onDisconnect?.(session.callId);
|
|
190
210
|
}
|
|
191
211
|
|
|
212
|
+
private getStreamToken(request: IncomingMessage): string | undefined {
|
|
213
|
+
if (!request.url || !request.headers.host) {
|
|
214
|
+
return undefined;
|
|
215
|
+
}
|
|
216
|
+
try {
|
|
217
|
+
const url = new URL(request.url, `http://${request.headers.host}`);
|
|
218
|
+
return url.searchParams.get("token") ?? undefined;
|
|
219
|
+
} catch {
|
|
220
|
+
return undefined;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
|
|
192
224
|
/**
|
|
193
225
|
* Get an active session with an open WebSocket, or undefined if unavailable.
|
|
194
226
|
*/
|
package/src/providers/telnyx.ts
CHANGED
|
@@ -21,15 +21,21 @@ import type { VoiceCallProvider } from "./base.js";
|
|
|
21
21
|
* Uses Telnyx Call Control API v2 for managing calls.
|
|
22
22
|
* @see https://developers.telnyx.com/docs/api/v2/call-control
|
|
23
23
|
*/
|
|
24
|
+
export interface TelnyxProviderOptions {
|
|
25
|
+
/** Allow unsigned webhooks when no public key is configured */
|
|
26
|
+
allowUnsignedWebhooks?: boolean;
|
|
27
|
+
}
|
|
28
|
+
|
|
24
29
|
export class TelnyxProvider implements VoiceCallProvider {
|
|
25
30
|
readonly name = "telnyx" as const;
|
|
26
31
|
|
|
27
32
|
private readonly apiKey: string;
|
|
28
33
|
private readonly connectionId: string;
|
|
29
34
|
private readonly publicKey: string | undefined;
|
|
35
|
+
private readonly options: TelnyxProviderOptions;
|
|
30
36
|
private readonly baseUrl = "https://api.telnyx.com/v2";
|
|
31
37
|
|
|
32
|
-
constructor(config: TelnyxConfig) {
|
|
38
|
+
constructor(config: TelnyxConfig, options: TelnyxProviderOptions = {}) {
|
|
33
39
|
if (!config.apiKey) {
|
|
34
40
|
throw new Error("Telnyx API key is required");
|
|
35
41
|
}
|
|
@@ -40,6 +46,7 @@ export class TelnyxProvider implements VoiceCallProvider {
|
|
|
40
46
|
this.apiKey = config.apiKey;
|
|
41
47
|
this.connectionId = config.connectionId;
|
|
42
48
|
this.publicKey = config.publicKey;
|
|
49
|
+
this.options = options;
|
|
43
50
|
}
|
|
44
51
|
|
|
45
52
|
/**
|
|
@@ -76,8 +83,14 @@ export class TelnyxProvider implements VoiceCallProvider {
|
|
|
76
83
|
*/
|
|
77
84
|
verifyWebhook(ctx: WebhookContext): WebhookVerificationResult {
|
|
78
85
|
if (!this.publicKey) {
|
|
79
|
-
|
|
80
|
-
|
|
86
|
+
if (this.options.allowUnsignedWebhooks) {
|
|
87
|
+
console.warn("[telnyx] Webhook verification skipped (no public key configured)");
|
|
88
|
+
return { ok: true, reason: "verification skipped (no public key configured)" };
|
|
89
|
+
}
|
|
90
|
+
return {
|
|
91
|
+
ok: false,
|
|
92
|
+
reason: "Missing telnyx.publicKey (configure to verify webhooks)",
|
|
93
|
+
};
|
|
81
94
|
}
|
|
82
95
|
|
|
83
96
|
const signature = ctx.headers["telnyx-signature-ed25519"];
|
|
@@ -2,7 +2,7 @@ import { describe, expect, it } from "vitest";
|
|
|
2
2
|
import type { WebhookContext } from "../types.js";
|
|
3
3
|
import { TwilioProvider } from "./twilio.js";
|
|
4
4
|
|
|
5
|
-
const
|
|
5
|
+
const STREAM_URL_PREFIX = "wss://example.ngrok.app/voice/stream?token=";
|
|
6
6
|
|
|
7
7
|
function createProvider(): TwilioProvider {
|
|
8
8
|
return new TwilioProvider(
|
|
@@ -24,13 +24,13 @@ function createContext(rawBody: string, query?: WebhookContext["query"]): Webhoo
|
|
|
24
24
|
describe("TwilioProvider", () => {
|
|
25
25
|
it("returns streaming TwiML for outbound conversation calls before in-progress", () => {
|
|
26
26
|
const provider = createProvider();
|
|
27
|
-
const ctx = createContext("CallStatus=initiated&Direction=outbound-api", {
|
|
27
|
+
const ctx = createContext("CallStatus=initiated&Direction=outbound-api&CallSid=CA123", {
|
|
28
28
|
callId: "call-1",
|
|
29
29
|
});
|
|
30
30
|
|
|
31
31
|
const result = provider.parseWebhookEvent(ctx);
|
|
32
32
|
|
|
33
|
-
expect(result.providerResponseBody).toContain(
|
|
33
|
+
expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
|
|
34
34
|
expect(result.providerResponseBody).toContain("<Connect>");
|
|
35
35
|
});
|
|
36
36
|
|
|
@@ -50,11 +50,11 @@ describe("TwilioProvider", () => {
|
|
|
50
50
|
|
|
51
51
|
it("returns streaming TwiML for inbound calls", () => {
|
|
52
52
|
const provider = createProvider();
|
|
53
|
-
const ctx = createContext("CallStatus=ringing&Direction=inbound");
|
|
53
|
+
const ctx = createContext("CallStatus=ringing&Direction=inbound&CallSid=CA456");
|
|
54
54
|
|
|
55
55
|
const result = provider.parseWebhookEvent(ctx);
|
|
56
56
|
|
|
57
|
-
expect(result.providerResponseBody).toContain(
|
|
57
|
+
expect(result.providerResponseBody).toContain(STREAM_URL_PREFIX);
|
|
58
58
|
expect(result.providerResponseBody).toContain("<Connect>");
|
|
59
59
|
});
|
|
60
60
|
});
|
package/src/providers/twilio.ts
CHANGED
|
@@ -60,6 +60,8 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
60
60
|
|
|
61
61
|
/** Map of call SID to stream SID for media streams */
|
|
62
62
|
private callStreamMap = new Map<string, string>();
|
|
63
|
+
/** Per-call tokens for media stream authentication */
|
|
64
|
+
private streamAuthTokens = new Map<string, string>();
|
|
63
65
|
|
|
64
66
|
/** Storage for TwiML content (for notify mode with URL-based TwiML) */
|
|
65
67
|
private readonly twimlStorage = new Map<string, string>();
|
|
@@ -94,6 +96,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
94
96
|
}
|
|
95
97
|
|
|
96
98
|
this.deleteStoredTwiml(callIdMatch[1]);
|
|
99
|
+
this.streamAuthTokens.delete(providerCallId);
|
|
97
100
|
}
|
|
98
101
|
|
|
99
102
|
constructor(config: TwilioConfig, options: TwilioProviderOptions = {}) {
|
|
@@ -138,6 +141,19 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
138
141
|
this.callStreamMap.delete(callSid);
|
|
139
142
|
}
|
|
140
143
|
|
|
144
|
+
isValidStreamToken(callSid: string, token?: string): boolean {
|
|
145
|
+
const expected = this.streamAuthTokens.get(callSid);
|
|
146
|
+
if (!expected || !token) {
|
|
147
|
+
return false;
|
|
148
|
+
}
|
|
149
|
+
if (expected.length !== token.length) {
|
|
150
|
+
const dummy = Buffer.from(expected);
|
|
151
|
+
crypto.timingSafeEqual(dummy, dummy);
|
|
152
|
+
return false;
|
|
153
|
+
}
|
|
154
|
+
return crypto.timingSafeEqual(Buffer.from(expected), Buffer.from(token));
|
|
155
|
+
}
|
|
156
|
+
|
|
141
157
|
/**
|
|
142
158
|
* Clear TTS queue for a call (barge-in).
|
|
143
159
|
* Used when user starts speaking to interrupt current TTS playback.
|
|
@@ -271,11 +287,13 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
271
287
|
case "busy":
|
|
272
288
|
case "no-answer":
|
|
273
289
|
case "failed":
|
|
290
|
+
this.streamAuthTokens.delete(callSid);
|
|
274
291
|
if (callIdOverride) {
|
|
275
292
|
this.deleteStoredTwiml(callIdOverride);
|
|
276
293
|
}
|
|
277
294
|
return { ...baseEvent, type: "call.ended", reason: callStatus };
|
|
278
295
|
case "canceled":
|
|
296
|
+
this.streamAuthTokens.delete(callSid);
|
|
279
297
|
if (callIdOverride) {
|
|
280
298
|
this.deleteStoredTwiml(callIdOverride);
|
|
281
299
|
}
|
|
@@ -308,6 +326,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
308
326
|
const callStatus = params.get("CallStatus");
|
|
309
327
|
const direction = params.get("Direction");
|
|
310
328
|
const isOutbound = direction?.startsWith("outbound") ?? false;
|
|
329
|
+
const callSid = params.get("CallSid") || undefined;
|
|
311
330
|
const callIdFromQuery =
|
|
312
331
|
typeof ctx.query?.callId === "string" && ctx.query.callId.trim()
|
|
313
332
|
? ctx.query.callId.trim()
|
|
@@ -330,7 +349,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
330
349
|
|
|
331
350
|
// Conversation mode: return streaming TwiML immediately for outbound calls.
|
|
332
351
|
if (isOutbound) {
|
|
333
|
-
const streamUrl = this.
|
|
352
|
+
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
|
|
334
353
|
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
|
335
354
|
}
|
|
336
355
|
}
|
|
@@ -343,7 +362,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
343
362
|
// Handle subsequent webhook requests (status callbacks, etc.)
|
|
344
363
|
// For inbound calls, answer immediately with stream
|
|
345
364
|
if (direction === "inbound") {
|
|
346
|
-
const streamUrl = this.
|
|
365
|
+
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
|
|
347
366
|
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
|
348
367
|
}
|
|
349
368
|
|
|
@@ -352,7 +371,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
352
371
|
return TwilioProvider.EMPTY_TWIML;
|
|
353
372
|
}
|
|
354
373
|
|
|
355
|
-
const streamUrl = this.
|
|
374
|
+
const streamUrl = callSid ? this.getStreamUrlForCall(callSid) : null;
|
|
356
375
|
return streamUrl ? this.getStreamConnectXml(streamUrl) : TwilioProvider.PAUSE_TWIML;
|
|
357
376
|
}
|
|
358
377
|
|
|
@@ -380,6 +399,27 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
380
399
|
return `${wsOrigin}${path}`;
|
|
381
400
|
}
|
|
382
401
|
|
|
402
|
+
private getStreamAuthToken(callSid: string): string {
|
|
403
|
+
const existing = this.streamAuthTokens.get(callSid);
|
|
404
|
+
if (existing) {
|
|
405
|
+
return existing;
|
|
406
|
+
}
|
|
407
|
+
const token = crypto.randomBytes(16).toString("base64url");
|
|
408
|
+
this.streamAuthTokens.set(callSid, token);
|
|
409
|
+
return token;
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
private getStreamUrlForCall(callSid: string): string | null {
|
|
413
|
+
const baseUrl = this.getStreamUrl();
|
|
414
|
+
if (!baseUrl) {
|
|
415
|
+
return null;
|
|
416
|
+
}
|
|
417
|
+
const token = this.getStreamAuthToken(callSid);
|
|
418
|
+
const url = new URL(baseUrl);
|
|
419
|
+
url.searchParams.set("token", token);
|
|
420
|
+
return url.toString();
|
|
421
|
+
}
|
|
422
|
+
|
|
383
423
|
/**
|
|
384
424
|
* Generate TwiML to connect a call to a WebSocket media stream.
|
|
385
425
|
* This enables bidirectional audio streaming for real-time STT/TTS.
|
|
@@ -444,6 +484,7 @@ export class TwilioProvider implements VoiceCallProvider {
|
|
|
444
484
|
this.deleteStoredTwimlForProviderCall(input.providerCallId);
|
|
445
485
|
|
|
446
486
|
this.callWebhookUrls.delete(input.providerCallId);
|
|
487
|
+
this.streamAuthTokens.delete(input.providerCallId);
|
|
447
488
|
|
|
448
489
|
await this.apiRequest(
|
|
449
490
|
`/Calls/${input.providerCallId}.json`,
|
package/src/runtime.ts
CHANGED
|
@@ -48,11 +48,17 @@ function resolveProvider(config: VoiceCallConfig): VoiceCallProvider {
|
|
|
48
48
|
|
|
49
49
|
switch (config.provider) {
|
|
50
50
|
case "telnyx":
|
|
51
|
-
return new TelnyxProvider(
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
51
|
+
return new TelnyxProvider(
|
|
52
|
+
{
|
|
53
|
+
apiKey: config.telnyx?.apiKey,
|
|
54
|
+
connectionId: config.telnyx?.connectionId,
|
|
55
|
+
publicKey: config.telnyx?.publicKey,
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
allowUnsignedWebhooks:
|
|
59
|
+
config.inboundPolicy === "open" || config.inboundPolicy === "disabled",
|
|
60
|
+
},
|
|
61
|
+
);
|
|
56
62
|
case "twilio":
|
|
57
63
|
return new TwilioProvider(
|
|
58
64
|
{
|
package/src/webhook.ts
CHANGED
|
@@ -11,6 +11,8 @@ import type { NormalizedEvent, WebhookContext } from "./types.js";
|
|
|
11
11
|
import { MediaStreamHandler } from "./media-stream.js";
|
|
12
12
|
import { OpenAIRealtimeSTTProvider } from "./providers/stt-openai-realtime.js";
|
|
13
13
|
|
|
14
|
+
const MAX_WEBHOOK_BODY_BYTES = 1024 * 1024;
|
|
15
|
+
|
|
14
16
|
/**
|
|
15
17
|
* HTTP server for receiving voice call webhooks from providers.
|
|
16
18
|
* Supports WebSocket upgrades for media streams when streaming is enabled.
|
|
@@ -69,6 +71,20 @@ export class VoiceCallWebhookServer {
|
|
|
69
71
|
|
|
70
72
|
const streamConfig: MediaStreamConfig = {
|
|
71
73
|
sttProvider,
|
|
74
|
+
shouldAcceptStream: ({ callId, token }) => {
|
|
75
|
+
const call = this.manager.getCallByProviderCallId(callId);
|
|
76
|
+
if (!call) {
|
|
77
|
+
return false;
|
|
78
|
+
}
|
|
79
|
+
if (this.provider.name === "twilio") {
|
|
80
|
+
const twilio = this.provider as TwilioProvider;
|
|
81
|
+
if (!twilio.isValidStreamToken(callId, token)) {
|
|
82
|
+
console.warn(`[voice-call] Rejecting media stream: invalid token for ${callId}`);
|
|
83
|
+
return false;
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
return true;
|
|
87
|
+
},
|
|
72
88
|
onTranscript: (providerCallId, transcript) => {
|
|
73
89
|
console.log(`[voice-call] Transcript for ${providerCallId}: ${transcript}`);
|
|
74
90
|
|
|
@@ -224,7 +240,17 @@ export class VoiceCallWebhookServer {
|
|
|
224
240
|
}
|
|
225
241
|
|
|
226
242
|
// Read body
|
|
227
|
-
|
|
243
|
+
let body = "";
|
|
244
|
+
try {
|
|
245
|
+
body = await this.readBody(req, MAX_WEBHOOK_BODY_BYTES);
|
|
246
|
+
} catch (err) {
|
|
247
|
+
if (err instanceof Error && err.message === "PayloadTooLarge") {
|
|
248
|
+
res.statusCode = 413;
|
|
249
|
+
res.end("Payload Too Large");
|
|
250
|
+
return;
|
|
251
|
+
}
|
|
252
|
+
throw err;
|
|
253
|
+
}
|
|
228
254
|
|
|
229
255
|
// Build webhook context
|
|
230
256
|
const ctx: WebhookContext = {
|
|
@@ -272,10 +298,19 @@ export class VoiceCallWebhookServer {
|
|
|
272
298
|
/**
|
|
273
299
|
* Read request body as string.
|
|
274
300
|
*/
|
|
275
|
-
private readBody(req: http.IncomingMessage): Promise<string> {
|
|
301
|
+
private readBody(req: http.IncomingMessage, maxBytes: number): Promise<string> {
|
|
276
302
|
return new Promise((resolve, reject) => {
|
|
277
303
|
const chunks: Buffer[] = [];
|
|
278
|
-
|
|
304
|
+
let totalBytes = 0;
|
|
305
|
+
req.on("data", (chunk: Buffer) => {
|
|
306
|
+
totalBytes += chunk.length;
|
|
307
|
+
if (totalBytes > maxBytes) {
|
|
308
|
+
req.destroy();
|
|
309
|
+
reject(new Error("PayloadTooLarge"));
|
|
310
|
+
return;
|
|
311
|
+
}
|
|
312
|
+
chunks.push(chunk);
|
|
313
|
+
});
|
|
279
314
|
req.on("end", () => resolve(Buffer.concat(chunks).toString("utf-8")));
|
|
280
315
|
req.on("error", reject);
|
|
281
316
|
});
|