@openclaw/voice-call 2026.2.21 → 2026.2.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +11 -0
- package/package.json +1 -1
- package/src/config.test.ts +4 -0
- package/src/config.ts +15 -0
- package/src/manager/events.test.ts +38 -52
- package/src/manager/outbound.ts +51 -31
- package/src/manager.test.ts +50 -127
- package/src/media-stream.test.ts +175 -0
- package/src/media-stream.ts +110 -0
- package/src/webhook.ts +15 -3
package/CHANGELOG.md
CHANGED
package/README.md
CHANGED
|
@@ -76,6 +76,10 @@ Put under `plugins.entries.voice-call.config`:
|
|
|
76
76
|
streaming: {
|
|
77
77
|
enabled: true,
|
|
78
78
|
streamPath: "/voice/stream",
|
|
79
|
+
preStartTimeoutMs: 5000,
|
|
80
|
+
maxPendingConnections: 32,
|
|
81
|
+
maxPendingConnectionsPerIp: 4,
|
|
82
|
+
maxConnections: 128,
|
|
79
83
|
},
|
|
80
84
|
}
|
|
81
85
|
```
|
|
@@ -87,6 +91,13 @@ Notes:
|
|
|
87
91
|
- Telnyx requires `telnyx.publicKey` (or `TELNYX_PUBLIC_KEY`) unless `skipSignatureVerification` is true.
|
|
88
92
|
- `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
|
|
89
93
|
|
|
94
|
+
Streaming security defaults:
|
|
95
|
+
|
|
96
|
+
- `streaming.preStartTimeoutMs` closes sockets that never send a valid `start` frame.
|
|
97
|
+
- `streaming.maxPendingConnections` caps total unauthenticated pre-start sockets.
|
|
98
|
+
- `streaming.maxPendingConnectionsPerIp` caps unauthenticated pre-start sockets per source IP.
|
|
99
|
+
- `streaming.maxConnections` caps total open media stream sockets (pending + active).
|
|
100
|
+
|
|
90
101
|
## Stale call reaper
|
|
91
102
|
|
|
92
103
|
Use `staleCallReaperSeconds` to end calls that never receive a terminal webhook
|
package/package.json
CHANGED
package/src/config.test.ts
CHANGED
|
@@ -30,6 +30,10 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
|
|
|
30
30
|
silenceDurationMs: 800,
|
|
31
31
|
vadThreshold: 0.5,
|
|
32
32
|
streamPath: "/voice/stream",
|
|
33
|
+
preStartTimeoutMs: 5000,
|
|
34
|
+
maxPendingConnections: 32,
|
|
35
|
+
maxPendingConnectionsPerIp: 4,
|
|
36
|
+
maxConnections: 128,
|
|
33
37
|
},
|
|
34
38
|
skipSignatureVerification: false,
|
|
35
39
|
stt: { provider: "openai", model: "whisper-1" },
|
package/src/config.ts
CHANGED
|
@@ -219,6 +219,17 @@ export const VoiceCallStreamingConfigSchema = z
|
|
|
219
219
|
vadThreshold: z.number().min(0).max(1).default(0.5),
|
|
220
220
|
/** WebSocket path for media stream connections */
|
|
221
221
|
streamPath: z.string().min(1).default("/voice/stream"),
|
|
222
|
+
/**
|
|
223
|
+
* Close unauthenticated media stream sockets if no valid `start` frame arrives in time.
|
|
224
|
+
* Protects against pre-auth idle connection hold attacks.
|
|
225
|
+
*/
|
|
226
|
+
preStartTimeoutMs: z.number().int().positive().default(5000),
|
|
227
|
+
/** Maximum number of concurrently pending (pre-start) media stream sockets. */
|
|
228
|
+
maxPendingConnections: z.number().int().positive().default(32),
|
|
229
|
+
/** Maximum pending media stream sockets per source IP. */
|
|
230
|
+
maxPendingConnectionsPerIp: z.number().int().positive().default(4),
|
|
231
|
+
/** Hard cap for all open media stream sockets (pending + active). */
|
|
232
|
+
maxConnections: z.number().int().positive().default(128),
|
|
222
233
|
})
|
|
223
234
|
.strict()
|
|
224
235
|
.default({
|
|
@@ -228,6 +239,10 @@ export const VoiceCallStreamingConfigSchema = z
|
|
|
228
239
|
silenceDurationMs: 800,
|
|
229
240
|
vadThreshold: 0.5,
|
|
230
241
|
streamPath: "/voice/stream",
|
|
242
|
+
preStartTimeoutMs: 5000,
|
|
243
|
+
maxPendingConnections: 32,
|
|
244
|
+
maxPendingConnectionsPerIp: 4,
|
|
245
|
+
maxConnections: 128,
|
|
231
246
|
});
|
|
232
247
|
export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
|
|
233
248
|
|
|
@@ -45,6 +45,32 @@ function createProvider(overrides: Partial<VoiceCallProvider> = {}): VoiceCallPr
|
|
|
45
45
|
};
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
function createInboundDisabledConfig() {
|
|
49
|
+
return VoiceCallConfigSchema.parse({
|
|
50
|
+
enabled: true,
|
|
51
|
+
provider: "plivo",
|
|
52
|
+
fromNumber: "+15550000000",
|
|
53
|
+
inboundPolicy: "disabled",
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function createInboundInitiatedEvent(params: {
|
|
58
|
+
id: string;
|
|
59
|
+
providerCallId: string;
|
|
60
|
+
from: string;
|
|
61
|
+
}): NormalizedEvent {
|
|
62
|
+
return {
|
|
63
|
+
id: params.id,
|
|
64
|
+
type: "call.initiated",
|
|
65
|
+
callId: params.providerCallId,
|
|
66
|
+
providerCallId: params.providerCallId,
|
|
67
|
+
timestamp: Date.now(),
|
|
68
|
+
direction: "inbound",
|
|
69
|
+
from: params.from,
|
|
70
|
+
to: "+15550000000",
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
48
74
|
describe("processEvent (functional)", () => {
|
|
49
75
|
it("calls provider hangup when rejecting inbound call", () => {
|
|
50
76
|
const hangupCalls: HangupCallInput[] = [];
|
|
@@ -55,24 +81,14 @@ describe("processEvent (functional)", () => {
|
|
|
55
81
|
});
|
|
56
82
|
|
|
57
83
|
const ctx = createContext({
|
|
58
|
-
config:
|
|
59
|
-
enabled: true,
|
|
60
|
-
provider: "plivo",
|
|
61
|
-
fromNumber: "+15550000000",
|
|
62
|
-
inboundPolicy: "disabled",
|
|
63
|
-
}),
|
|
84
|
+
config: createInboundDisabledConfig(),
|
|
64
85
|
provider,
|
|
65
86
|
});
|
|
66
|
-
const event
|
|
87
|
+
const event = createInboundInitiatedEvent({
|
|
67
88
|
id: "evt-1",
|
|
68
|
-
type: "call.initiated",
|
|
69
|
-
callId: "prov-1",
|
|
70
89
|
providerCallId: "prov-1",
|
|
71
|
-
timestamp: Date.now(),
|
|
72
|
-
direction: "inbound",
|
|
73
90
|
from: "+15559999999",
|
|
74
|
-
|
|
75
|
-
};
|
|
91
|
+
});
|
|
76
92
|
|
|
77
93
|
processEvent(ctx, event);
|
|
78
94
|
|
|
@@ -87,24 +103,14 @@ describe("processEvent (functional)", () => {
|
|
|
87
103
|
|
|
88
104
|
it("does not call hangup when provider is null", () => {
|
|
89
105
|
const ctx = createContext({
|
|
90
|
-
config:
|
|
91
|
-
enabled: true,
|
|
92
|
-
provider: "plivo",
|
|
93
|
-
fromNumber: "+15550000000",
|
|
94
|
-
inboundPolicy: "disabled",
|
|
95
|
-
}),
|
|
106
|
+
config: createInboundDisabledConfig(),
|
|
96
107
|
provider: null,
|
|
97
108
|
});
|
|
98
|
-
const event
|
|
109
|
+
const event = createInboundInitiatedEvent({
|
|
99
110
|
id: "evt-2",
|
|
100
|
-
type: "call.initiated",
|
|
101
|
-
callId: "prov-2",
|
|
102
111
|
providerCallId: "prov-2",
|
|
103
|
-
timestamp: Date.now(),
|
|
104
|
-
direction: "inbound",
|
|
105
112
|
from: "+15551111111",
|
|
106
|
-
|
|
107
|
-
};
|
|
113
|
+
});
|
|
108
114
|
|
|
109
115
|
processEvent(ctx, event);
|
|
110
116
|
|
|
@@ -119,24 +125,14 @@ describe("processEvent (functional)", () => {
|
|
|
119
125
|
},
|
|
120
126
|
});
|
|
121
127
|
const ctx = createContext({
|
|
122
|
-
config:
|
|
123
|
-
enabled: true,
|
|
124
|
-
provider: "plivo",
|
|
125
|
-
fromNumber: "+15550000000",
|
|
126
|
-
inboundPolicy: "disabled",
|
|
127
|
-
}),
|
|
128
|
+
config: createInboundDisabledConfig(),
|
|
128
129
|
provider,
|
|
129
130
|
});
|
|
130
|
-
const event1
|
|
131
|
+
const event1 = createInboundInitiatedEvent({
|
|
131
132
|
id: "evt-init",
|
|
132
|
-
type: "call.initiated",
|
|
133
|
-
callId: "prov-dup",
|
|
134
133
|
providerCallId: "prov-dup",
|
|
135
|
-
timestamp: Date.now(),
|
|
136
|
-
direction: "inbound",
|
|
137
134
|
from: "+15552222222",
|
|
138
|
-
|
|
139
|
-
};
|
|
135
|
+
});
|
|
140
136
|
const event2: NormalizedEvent = {
|
|
141
137
|
id: "evt-ring",
|
|
142
138
|
type: "call.ringing",
|
|
@@ -228,24 +224,14 @@ describe("processEvent (functional)", () => {
|
|
|
228
224
|
},
|
|
229
225
|
});
|
|
230
226
|
const ctx = createContext({
|
|
231
|
-
config:
|
|
232
|
-
enabled: true,
|
|
233
|
-
provider: "plivo",
|
|
234
|
-
fromNumber: "+15550000000",
|
|
235
|
-
inboundPolicy: "disabled",
|
|
236
|
-
}),
|
|
227
|
+
config: createInboundDisabledConfig(),
|
|
237
228
|
provider,
|
|
238
229
|
});
|
|
239
|
-
const event
|
|
230
|
+
const event = createInboundInitiatedEvent({
|
|
240
231
|
id: "evt-fail",
|
|
241
|
-
type: "call.initiated",
|
|
242
|
-
callId: "prov-fail",
|
|
243
232
|
providerCallId: "prov-fail",
|
|
244
|
-
timestamp: Date.now(),
|
|
245
|
-
direction: "inbound",
|
|
246
233
|
from: "+15553333333",
|
|
247
|
-
|
|
248
|
-
};
|
|
234
|
+
});
|
|
249
235
|
|
|
250
236
|
expect(() => processEvent(ctx, event)).not.toThrow();
|
|
251
237
|
expect(ctx.activeCalls.size).toBe(0);
|
package/src/manager/outbound.ts
CHANGED
|
@@ -51,6 +51,32 @@ type EndCallContext = Pick<
|
|
|
51
51
|
| "maxDurationTimers"
|
|
52
52
|
>;
|
|
53
53
|
|
|
54
|
+
type ConnectedCallContext = Pick<CallManagerContext, "activeCalls" | "provider">;
|
|
55
|
+
|
|
56
|
+
type ConnectedCallLookup =
|
|
57
|
+
| { kind: "error"; error: string }
|
|
58
|
+
| { kind: "ended"; call: CallRecord }
|
|
59
|
+
| {
|
|
60
|
+
kind: "ok";
|
|
61
|
+
call: CallRecord;
|
|
62
|
+
providerCallId: string;
|
|
63
|
+
provider: NonNullable<ConnectedCallContext["provider"]>;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
function lookupConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallLookup {
|
|
67
|
+
const call = ctx.activeCalls.get(callId);
|
|
68
|
+
if (!call) {
|
|
69
|
+
return { kind: "error", error: "Call not found" };
|
|
70
|
+
}
|
|
71
|
+
if (!ctx.provider || !call.providerCallId) {
|
|
72
|
+
return { kind: "error", error: "Call not connected" };
|
|
73
|
+
}
|
|
74
|
+
if (TerminalStates.has(call.state)) {
|
|
75
|
+
return { kind: "ended", call };
|
|
76
|
+
}
|
|
77
|
+
return { kind: "ok", call, providerCallId: call.providerCallId, provider: ctx.provider };
|
|
78
|
+
}
|
|
79
|
+
|
|
54
80
|
export async function initiateCall(
|
|
55
81
|
ctx: InitiateContext,
|
|
56
82
|
to: string,
|
|
@@ -149,26 +175,25 @@ export async function speak(
|
|
|
149
175
|
callId: CallId,
|
|
150
176
|
text: string,
|
|
151
177
|
): Promise<{ success: boolean; error?: string }> {
|
|
152
|
-
const
|
|
153
|
-
if (
|
|
154
|
-
return { success: false, error:
|
|
155
|
-
}
|
|
156
|
-
if (!ctx.provider || !call.providerCallId) {
|
|
157
|
-
return { success: false, error: "Call not connected" };
|
|
178
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
179
|
+
if (lookup.kind === "error") {
|
|
180
|
+
return { success: false, error: lookup.error };
|
|
158
181
|
}
|
|
159
|
-
if (
|
|
182
|
+
if (lookup.kind === "ended") {
|
|
160
183
|
return { success: false, error: "Call has ended" };
|
|
161
184
|
}
|
|
185
|
+
const { call, providerCallId, provider } = lookup;
|
|
186
|
+
|
|
162
187
|
try {
|
|
163
188
|
transitionState(call, "speaking");
|
|
164
189
|
persistCallRecord(ctx.storePath, call);
|
|
165
190
|
|
|
166
191
|
addTranscriptEntry(call, "bot", text);
|
|
167
192
|
|
|
168
|
-
const voice =
|
|
169
|
-
await
|
|
193
|
+
const voice = provider.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
|
|
194
|
+
await provider.playTts({
|
|
170
195
|
callId,
|
|
171
|
-
providerCallId
|
|
196
|
+
providerCallId,
|
|
172
197
|
text,
|
|
173
198
|
voice,
|
|
174
199
|
});
|
|
@@ -232,16 +257,15 @@ export async function continueCall(
|
|
|
232
257
|
callId: CallId,
|
|
233
258
|
prompt: string,
|
|
234
259
|
): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
|
235
|
-
const
|
|
236
|
-
if (
|
|
237
|
-
return { success: false, error:
|
|
260
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
261
|
+
if (lookup.kind === "error") {
|
|
262
|
+
return { success: false, error: lookup.error };
|
|
238
263
|
}
|
|
239
|
-
if (
|
|
240
|
-
return { success: false, error: "Call not connected" };
|
|
241
|
-
}
|
|
242
|
-
if (TerminalStates.has(call.state)) {
|
|
264
|
+
if (lookup.kind === "ended") {
|
|
243
265
|
return { success: false, error: "Call has ended" };
|
|
244
266
|
}
|
|
267
|
+
const { call, providerCallId, provider } = lookup;
|
|
268
|
+
|
|
245
269
|
if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) {
|
|
246
270
|
return { success: false, error: "Already waiting for transcript" };
|
|
247
271
|
}
|
|
@@ -256,13 +280,13 @@ export async function continueCall(
|
|
|
256
280
|
persistCallRecord(ctx.storePath, call);
|
|
257
281
|
|
|
258
282
|
const listenStartedAt = Date.now();
|
|
259
|
-
await
|
|
283
|
+
await provider.startListening({ callId, providerCallId });
|
|
260
284
|
|
|
261
285
|
const transcript = await waitForFinalTranscript(ctx, callId);
|
|
262
286
|
const transcriptReceivedAt = Date.now();
|
|
263
287
|
|
|
264
288
|
// Best-effort: stop listening after final transcript.
|
|
265
|
-
await
|
|
289
|
+
await provider.stopListening({ callId, providerCallId });
|
|
266
290
|
|
|
267
291
|
const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
|
|
268
292
|
const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
|
|
@@ -302,21 +326,19 @@ export async function endCall(
|
|
|
302
326
|
ctx: EndCallContext,
|
|
303
327
|
callId: CallId,
|
|
304
328
|
): Promise<{ success: boolean; error?: string }> {
|
|
305
|
-
const
|
|
306
|
-
if (
|
|
307
|
-
return { success: false, error:
|
|
329
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
330
|
+
if (lookup.kind === "error") {
|
|
331
|
+
return { success: false, error: lookup.error };
|
|
308
332
|
}
|
|
309
|
-
if (
|
|
310
|
-
return { success: false, error: "Call not connected" };
|
|
311
|
-
}
|
|
312
|
-
if (TerminalStates.has(call.state)) {
|
|
333
|
+
if (lookup.kind === "ended") {
|
|
313
334
|
return { success: true };
|
|
314
335
|
}
|
|
336
|
+
const { call, providerCallId, provider } = lookup;
|
|
315
337
|
|
|
316
338
|
try {
|
|
317
|
-
await
|
|
339
|
+
await provider.hangupCall({
|
|
318
340
|
callId,
|
|
319
|
-
providerCallId
|
|
341
|
+
providerCallId,
|
|
320
342
|
reason: "hangup-bot",
|
|
321
343
|
});
|
|
322
344
|
|
|
@@ -329,9 +351,7 @@ export async function endCall(
|
|
|
329
351
|
rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
|
|
330
352
|
|
|
331
353
|
ctx.activeCalls.delete(callId);
|
|
332
|
-
|
|
333
|
-
ctx.providerCallIdMap.delete(call.providerCallId);
|
|
334
|
-
}
|
|
354
|
+
ctx.providerCallIdMap.delete(providerCallId);
|
|
335
355
|
|
|
336
356
|
return { success: true };
|
|
337
357
|
} catch (err) {
|
package/src/manager.test.ts
CHANGED
|
@@ -46,17 +46,44 @@ class FakeProvider implements VoiceCallProvider {
|
|
|
46
46
|
}
|
|
47
47
|
}
|
|
48
48
|
|
|
49
|
+
let storeSeq = 0;
|
|
50
|
+
|
|
51
|
+
function createTestStorePath(): string {
|
|
52
|
+
storeSeq += 1;
|
|
53
|
+
return path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}-${storeSeq}`);
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
function createManagerHarness(
|
|
57
|
+
configOverrides: Record<string, unknown> = {},
|
|
58
|
+
provider = new FakeProvider(),
|
|
59
|
+
): {
|
|
60
|
+
manager: CallManager;
|
|
61
|
+
provider: FakeProvider;
|
|
62
|
+
} {
|
|
63
|
+
const config = VoiceCallConfigSchema.parse({
|
|
64
|
+
enabled: true,
|
|
65
|
+
provider: "plivo",
|
|
66
|
+
fromNumber: "+15550000000",
|
|
67
|
+
...configOverrides,
|
|
68
|
+
});
|
|
69
|
+
const manager = new CallManager(config, createTestStorePath());
|
|
70
|
+
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
71
|
+
return { manager, provider };
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function markCallAnswered(manager: CallManager, callId: string, eventId: string): void {
|
|
75
|
+
manager.processEvent({
|
|
76
|
+
id: eventId,
|
|
77
|
+
type: "call.answered",
|
|
78
|
+
callId,
|
|
79
|
+
providerCallId: "request-uuid",
|
|
80
|
+
timestamp: Date.now(),
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
49
84
|
describe("CallManager", () => {
|
|
50
85
|
it("upgrades providerCallId mapping when provider ID changes", async () => {
|
|
51
|
-
const
|
|
52
|
-
enabled: true,
|
|
53
|
-
provider: "plivo",
|
|
54
|
-
fromNumber: "+15550000000",
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
58
|
-
const manager = new CallManager(config, storePath);
|
|
59
|
-
manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
|
|
86
|
+
const { manager } = createManagerHarness();
|
|
60
87
|
|
|
61
88
|
const { callId, success, error } = await manager.initiateCall("+15550000001");
|
|
62
89
|
expect(success).toBe(true);
|
|
@@ -81,16 +108,7 @@ describe("CallManager", () => {
|
|
|
81
108
|
});
|
|
82
109
|
|
|
83
110
|
it("speaks initial message on answered for notify mode (non-Twilio)", async () => {
|
|
84
|
-
const
|
|
85
|
-
enabled: true,
|
|
86
|
-
provider: "plivo",
|
|
87
|
-
fromNumber: "+15550000000",
|
|
88
|
-
});
|
|
89
|
-
|
|
90
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
91
|
-
const provider = new FakeProvider();
|
|
92
|
-
const manager = new CallManager(config, storePath);
|
|
93
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
111
|
+
const { manager, provider } = createManagerHarness();
|
|
94
112
|
|
|
95
113
|
const { callId, success } = await manager.initiateCall("+15550000002", undefined, {
|
|
96
114
|
message: "Hello there",
|
|
@@ -113,19 +131,11 @@ describe("CallManager", () => {
|
|
|
113
131
|
});
|
|
114
132
|
|
|
115
133
|
it("rejects inbound calls with missing caller ID when allowlist enabled", () => {
|
|
116
|
-
const
|
|
117
|
-
enabled: true,
|
|
118
|
-
provider: "plivo",
|
|
119
|
-
fromNumber: "+15550000000",
|
|
134
|
+
const { manager, provider } = createManagerHarness({
|
|
120
135
|
inboundPolicy: "allowlist",
|
|
121
136
|
allowFrom: ["+15550001234"],
|
|
122
137
|
});
|
|
123
138
|
|
|
124
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
125
|
-
const provider = new FakeProvider();
|
|
126
|
-
const manager = new CallManager(config, storePath);
|
|
127
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
128
|
-
|
|
129
139
|
manager.processEvent({
|
|
130
140
|
id: "evt-allowlist-missing",
|
|
131
141
|
type: "call.initiated",
|
|
@@ -142,19 +152,11 @@ describe("CallManager", () => {
|
|
|
142
152
|
});
|
|
143
153
|
|
|
144
154
|
it("rejects inbound calls with anonymous caller ID when allowlist enabled", () => {
|
|
145
|
-
const
|
|
146
|
-
enabled: true,
|
|
147
|
-
provider: "plivo",
|
|
148
|
-
fromNumber: "+15550000000",
|
|
155
|
+
const { manager, provider } = createManagerHarness({
|
|
149
156
|
inboundPolicy: "allowlist",
|
|
150
157
|
allowFrom: ["+15550001234"],
|
|
151
158
|
});
|
|
152
159
|
|
|
153
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
154
|
-
const provider = new FakeProvider();
|
|
155
|
-
const manager = new CallManager(config, storePath);
|
|
156
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
157
|
-
|
|
158
160
|
manager.processEvent({
|
|
159
161
|
id: "evt-allowlist-anon",
|
|
160
162
|
type: "call.initiated",
|
|
@@ -172,19 +174,11 @@ describe("CallManager", () => {
|
|
|
172
174
|
});
|
|
173
175
|
|
|
174
176
|
it("rejects inbound calls that only match allowlist suffixes", () => {
|
|
175
|
-
const
|
|
176
|
-
enabled: true,
|
|
177
|
-
provider: "plivo",
|
|
178
|
-
fromNumber: "+15550000000",
|
|
177
|
+
const { manager, provider } = createManagerHarness({
|
|
179
178
|
inboundPolicy: "allowlist",
|
|
180
179
|
allowFrom: ["+15550001234"],
|
|
181
180
|
});
|
|
182
181
|
|
|
183
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
184
|
-
const provider = new FakeProvider();
|
|
185
|
-
const manager = new CallManager(config, storePath);
|
|
186
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
187
|
-
|
|
188
182
|
manager.processEvent({
|
|
189
183
|
id: "evt-allowlist-suffix",
|
|
190
184
|
type: "call.initiated",
|
|
@@ -202,18 +196,10 @@ describe("CallManager", () => {
|
|
|
202
196
|
});
|
|
203
197
|
|
|
204
198
|
it("rejects duplicate inbound events with a single hangup call", () => {
|
|
205
|
-
const
|
|
206
|
-
enabled: true,
|
|
207
|
-
provider: "plivo",
|
|
208
|
-
fromNumber: "+15550000000",
|
|
199
|
+
const { manager, provider } = createManagerHarness({
|
|
209
200
|
inboundPolicy: "disabled",
|
|
210
201
|
});
|
|
211
202
|
|
|
212
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
213
|
-
const provider = new FakeProvider();
|
|
214
|
-
const manager = new CallManager(config, storePath);
|
|
215
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
216
|
-
|
|
217
203
|
manager.processEvent({
|
|
218
204
|
id: "evt-reject-init",
|
|
219
205
|
type: "call.initiated",
|
|
@@ -242,18 +228,11 @@ describe("CallManager", () => {
|
|
|
242
228
|
});
|
|
243
229
|
|
|
244
230
|
it("accepts inbound calls that exactly match the allowlist", () => {
|
|
245
|
-
const
|
|
246
|
-
enabled: true,
|
|
247
|
-
provider: "plivo",
|
|
248
|
-
fromNumber: "+15550000000",
|
|
231
|
+
const { manager } = createManagerHarness({
|
|
249
232
|
inboundPolicy: "allowlist",
|
|
250
233
|
allowFrom: ["+15550001234"],
|
|
251
234
|
});
|
|
252
235
|
|
|
253
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
254
|
-
const manager = new CallManager(config, storePath);
|
|
255
|
-
manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
|
|
256
|
-
|
|
257
236
|
manager.processEvent({
|
|
258
237
|
id: "evt-allowlist-exact",
|
|
259
238
|
type: "call.initiated",
|
|
@@ -269,28 +248,14 @@ describe("CallManager", () => {
|
|
|
269
248
|
});
|
|
270
249
|
|
|
271
250
|
it("completes a closed-loop turn without live audio", async () => {
|
|
272
|
-
const
|
|
273
|
-
enabled: true,
|
|
274
|
-
provider: "plivo",
|
|
275
|
-
fromNumber: "+15550000000",
|
|
251
|
+
const { manager, provider } = createManagerHarness({
|
|
276
252
|
transcriptTimeoutMs: 5000,
|
|
277
253
|
});
|
|
278
254
|
|
|
279
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
280
|
-
const provider = new FakeProvider();
|
|
281
|
-
const manager = new CallManager(config, storePath);
|
|
282
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
283
|
-
|
|
284
255
|
const started = await manager.initiateCall("+15550000003");
|
|
285
256
|
expect(started.success).toBe(true);
|
|
286
257
|
|
|
287
|
-
manager.
|
|
288
|
-
id: "evt-closed-loop-answered",
|
|
289
|
-
type: "call.answered",
|
|
290
|
-
callId: started.callId,
|
|
291
|
-
providerCallId: "request-uuid",
|
|
292
|
-
timestamp: Date.now(),
|
|
293
|
-
});
|
|
258
|
+
markCallAnswered(manager, started.callId, "evt-closed-loop-answered");
|
|
294
259
|
|
|
295
260
|
const turnPromise = manager.continueCall(started.callId, "How can I help?");
|
|
296
261
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
@@ -323,28 +288,14 @@ describe("CallManager", () => {
|
|
|
323
288
|
});
|
|
324
289
|
|
|
325
290
|
it("rejects overlapping continueCall requests for the same call", async () => {
|
|
326
|
-
const
|
|
327
|
-
enabled: true,
|
|
328
|
-
provider: "plivo",
|
|
329
|
-
fromNumber: "+15550000000",
|
|
291
|
+
const { manager, provider } = createManagerHarness({
|
|
330
292
|
transcriptTimeoutMs: 5000,
|
|
331
293
|
});
|
|
332
294
|
|
|
333
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
334
|
-
const provider = new FakeProvider();
|
|
335
|
-
const manager = new CallManager(config, storePath);
|
|
336
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
337
|
-
|
|
338
295
|
const started = await manager.initiateCall("+15550000004");
|
|
339
296
|
expect(started.success).toBe(true);
|
|
340
297
|
|
|
341
|
-
manager.
|
|
342
|
-
id: "evt-overlap-answered",
|
|
343
|
-
type: "call.answered",
|
|
344
|
-
callId: started.callId,
|
|
345
|
-
providerCallId: "request-uuid",
|
|
346
|
-
timestamp: Date.now(),
|
|
347
|
-
});
|
|
298
|
+
markCallAnswered(manager, started.callId, "evt-overlap-answered");
|
|
348
299
|
|
|
349
300
|
const first = manager.continueCall(started.callId, "First prompt");
|
|
350
301
|
const second = await manager.continueCall(started.callId, "Second prompt");
|
|
@@ -369,28 +320,14 @@ describe("CallManager", () => {
|
|
|
369
320
|
});
|
|
370
321
|
|
|
371
322
|
it("tracks latency metadata across multiple closed-loop turns", async () => {
|
|
372
|
-
const
|
|
373
|
-
enabled: true,
|
|
374
|
-
provider: "plivo",
|
|
375
|
-
fromNumber: "+15550000000",
|
|
323
|
+
const { manager, provider } = createManagerHarness({
|
|
376
324
|
transcriptTimeoutMs: 5000,
|
|
377
325
|
});
|
|
378
326
|
|
|
379
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
380
|
-
const provider = new FakeProvider();
|
|
381
|
-
const manager = new CallManager(config, storePath);
|
|
382
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
383
|
-
|
|
384
327
|
const started = await manager.initiateCall("+15550000005");
|
|
385
328
|
expect(started.success).toBe(true);
|
|
386
329
|
|
|
387
|
-
manager.
|
|
388
|
-
id: "evt-multi-answered",
|
|
389
|
-
type: "call.answered",
|
|
390
|
-
callId: started.callId,
|
|
391
|
-
providerCallId: "request-uuid",
|
|
392
|
-
timestamp: Date.now(),
|
|
393
|
-
});
|
|
330
|
+
markCallAnswered(manager, started.callId, "evt-multi-answered");
|
|
394
331
|
|
|
395
332
|
const firstTurn = manager.continueCall(started.callId, "First question");
|
|
396
333
|
await new Promise((resolve) => setTimeout(resolve, 0));
|
|
@@ -436,28 +373,14 @@ describe("CallManager", () => {
|
|
|
436
373
|
});
|
|
437
374
|
|
|
438
375
|
it("handles repeated closed-loop turns without waiter churn", async () => {
|
|
439
|
-
const
|
|
440
|
-
enabled: true,
|
|
441
|
-
provider: "plivo",
|
|
442
|
-
fromNumber: "+15550000000",
|
|
376
|
+
const { manager, provider } = createManagerHarness({
|
|
443
377
|
transcriptTimeoutMs: 5000,
|
|
444
378
|
});
|
|
445
379
|
|
|
446
|
-
const storePath = path.join(os.tmpdir(), `openclaw-voice-call-test-${Date.now()}`);
|
|
447
|
-
const provider = new FakeProvider();
|
|
448
|
-
const manager = new CallManager(config, storePath);
|
|
449
|
-
manager.initialize(provider, "https://example.com/voice/webhook");
|
|
450
|
-
|
|
451
380
|
const started = await manager.initiateCall("+15550000006");
|
|
452
381
|
expect(started.success).toBe(true);
|
|
453
382
|
|
|
454
|
-
manager.
|
|
455
|
-
id: "evt-loop-answered",
|
|
456
|
-
type: "call.answered",
|
|
457
|
-
callId: started.callId,
|
|
458
|
-
providerCallId: "request-uuid",
|
|
459
|
-
timestamp: Date.now(),
|
|
460
|
-
});
|
|
383
|
+
markCallAnswered(manager, started.callId, "evt-loop-answered");
|
|
461
384
|
|
|
462
385
|
for (let i = 1; i <= 5; i++) {
|
|
463
386
|
const turnPromise = manager.continueCall(started.callId, `Prompt ${i}`);
|
package/src/media-stream.test.ts
CHANGED
|
@@ -1,4 +1,7 @@
|
|
|
1
|
+
import { once } from "node:events";
|
|
2
|
+
import http from "node:http";
|
|
1
3
|
import { describe, expect, it } from "vitest";
|
|
4
|
+
import { WebSocket } from "ws";
|
|
2
5
|
import { MediaStreamHandler } from "./media-stream.js";
|
|
3
6
|
import type {
|
|
4
7
|
OpenAIRealtimeSTTProvider,
|
|
@@ -34,6 +37,70 @@ const waitForAbort = (signal: AbortSignal): Promise<void> =>
|
|
|
34
37
|
signal.addEventListener("abort", () => resolve(), { once: true });
|
|
35
38
|
});
|
|
36
39
|
|
|
40
|
+
const withTimeout = async <T>(promise: Promise<T>, timeoutMs = 2000): Promise<T> => {
|
|
41
|
+
let timer: ReturnType<typeof setTimeout> | null = null;
|
|
42
|
+
const timeout = new Promise<never>((_, reject) => {
|
|
43
|
+
timer = setTimeout(() => reject(new Error(`Timed out after ${timeoutMs}ms`)), timeoutMs);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
return await Promise.race([promise, timeout]);
|
|
48
|
+
} finally {
|
|
49
|
+
if (timer) {
|
|
50
|
+
clearTimeout(timer);
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const startWsServer = async (
|
|
56
|
+
handler: MediaStreamHandler,
|
|
57
|
+
): Promise<{
|
|
58
|
+
url: string;
|
|
59
|
+
close: () => Promise<void>;
|
|
60
|
+
}> => {
|
|
61
|
+
const server = http.createServer();
|
|
62
|
+
server.on("upgrade", (request, socket, head) => {
|
|
63
|
+
handler.handleUpgrade(request, socket, head);
|
|
64
|
+
});
|
|
65
|
+
|
|
66
|
+
await new Promise<void>((resolve) => {
|
|
67
|
+
server.listen(0, "127.0.0.1", resolve);
|
|
68
|
+
});
|
|
69
|
+
|
|
70
|
+
const address = server.address();
|
|
71
|
+
if (!address || typeof address === "string") {
|
|
72
|
+
throw new Error("Failed to resolve test server address");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
return {
|
|
76
|
+
url: `ws://127.0.0.1:${address.port}/voice/stream`,
|
|
77
|
+
close: async () => {
|
|
78
|
+
await new Promise<void>((resolve, reject) => {
|
|
79
|
+
server.close((err) => (err ? reject(err) : resolve()));
|
|
80
|
+
});
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
};
|
|
84
|
+
|
|
85
|
+
const connectWs = async (url: string): Promise<WebSocket> => {
|
|
86
|
+
const ws = new WebSocket(url);
|
|
87
|
+
await withTimeout(once(ws, "open") as Promise<[unknown]>);
|
|
88
|
+
return ws;
|
|
89
|
+
};
|
|
90
|
+
|
|
91
|
+
const waitForClose = async (
|
|
92
|
+
ws: WebSocket,
|
|
93
|
+
): Promise<{
|
|
94
|
+
code: number;
|
|
95
|
+
reason: string;
|
|
96
|
+
}> => {
|
|
97
|
+
const [code, reason] = (await withTimeout(once(ws, "close") as Promise<[number, Buffer]>)) ?? [];
|
|
98
|
+
return {
|
|
99
|
+
code,
|
|
100
|
+
reason: Buffer.isBuffer(reason) ? reason.toString() : String(reason || ""),
|
|
101
|
+
};
|
|
102
|
+
};
|
|
103
|
+
|
|
37
104
|
describe("MediaStreamHandler TTS queue", () => {
|
|
38
105
|
it("serializes TTS playback and resolves in order", async () => {
|
|
39
106
|
const handler = new MediaStreamHandler({
|
|
@@ -94,3 +161,111 @@ describe("MediaStreamHandler TTS queue", () => {
|
|
|
94
161
|
expect(queuedRan).toBe(false);
|
|
95
162
|
});
|
|
96
163
|
});
|
|
164
|
+
|
|
165
|
+
describe("MediaStreamHandler security hardening", () => {
|
|
166
|
+
it("closes idle pre-start connections after timeout", async () => {
|
|
167
|
+
const shouldAcceptStreamCalls: Array<{ callId: string; streamSid: string; token?: string }> =
|
|
168
|
+
[];
|
|
169
|
+
const handler = new MediaStreamHandler({
|
|
170
|
+
sttProvider: createStubSttProvider(),
|
|
171
|
+
preStartTimeoutMs: 40,
|
|
172
|
+
shouldAcceptStream: (params) => {
|
|
173
|
+
shouldAcceptStreamCalls.push(params);
|
|
174
|
+
return true;
|
|
175
|
+
},
|
|
176
|
+
});
|
|
177
|
+
const server = await startWsServer(handler);
|
|
178
|
+
|
|
179
|
+
try {
|
|
180
|
+
const ws = await connectWs(server.url);
|
|
181
|
+
const closed = await waitForClose(ws);
|
|
182
|
+
|
|
183
|
+
expect(closed.code).toBe(1008);
|
|
184
|
+
expect(closed.reason).toBe("Start timeout");
|
|
185
|
+
expect(shouldAcceptStreamCalls).toEqual([]);
|
|
186
|
+
} finally {
|
|
187
|
+
await server.close();
|
|
188
|
+
}
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
it("enforces pending connection limits", async () => {
|
|
192
|
+
const handler = new MediaStreamHandler({
|
|
193
|
+
sttProvider: createStubSttProvider(),
|
|
194
|
+
preStartTimeoutMs: 5_000,
|
|
195
|
+
maxPendingConnections: 1,
|
|
196
|
+
maxPendingConnectionsPerIp: 1,
|
|
197
|
+
});
|
|
198
|
+
const server = await startWsServer(handler);
|
|
199
|
+
|
|
200
|
+
try {
|
|
201
|
+
const first = await connectWs(server.url);
|
|
202
|
+
const second = await connectWs(server.url);
|
|
203
|
+
const secondClosed = await waitForClose(second);
|
|
204
|
+
|
|
205
|
+
expect(secondClosed.code).toBe(1013);
|
|
206
|
+
expect(secondClosed.reason).toContain("Too many pending");
|
|
207
|
+
expect(first.readyState).toBe(WebSocket.OPEN);
|
|
208
|
+
|
|
209
|
+
first.close();
|
|
210
|
+
await waitForClose(first);
|
|
211
|
+
} finally {
|
|
212
|
+
await server.close();
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
|
|
216
|
+
it("rejects upgrades when max connection cap is reached", async () => {
|
|
217
|
+
const handler = new MediaStreamHandler({
|
|
218
|
+
sttProvider: createStubSttProvider(),
|
|
219
|
+
preStartTimeoutMs: 5_000,
|
|
220
|
+
maxConnections: 1,
|
|
221
|
+
maxPendingConnections: 10,
|
|
222
|
+
maxPendingConnectionsPerIp: 10,
|
|
223
|
+
});
|
|
224
|
+
const server = await startWsServer(handler);
|
|
225
|
+
|
|
226
|
+
try {
|
|
227
|
+
const first = await connectWs(server.url);
|
|
228
|
+
const secondError = await withTimeout(
|
|
229
|
+
new Promise<Error>((resolve) => {
|
|
230
|
+
const ws = new WebSocket(server.url);
|
|
231
|
+
ws.once("error", (err) => resolve(err as Error));
|
|
232
|
+
}),
|
|
233
|
+
);
|
|
234
|
+
|
|
235
|
+
expect(secondError.message).toContain("Unexpected server response: 503");
|
|
236
|
+
|
|
237
|
+
first.close();
|
|
238
|
+
await waitForClose(first);
|
|
239
|
+
} finally {
|
|
240
|
+
await server.close();
|
|
241
|
+
}
|
|
242
|
+
});
|
|
243
|
+
|
|
244
|
+
it("clears pending state after valid start", async () => {
|
|
245
|
+
const handler = new MediaStreamHandler({
|
|
246
|
+
sttProvider: createStubSttProvider(),
|
|
247
|
+
preStartTimeoutMs: 40,
|
|
248
|
+
shouldAcceptStream: () => true,
|
|
249
|
+
});
|
|
250
|
+
const server = await startWsServer(handler);
|
|
251
|
+
|
|
252
|
+
try {
|
|
253
|
+
const ws = await connectWs(server.url);
|
|
254
|
+
ws.send(
|
|
255
|
+
JSON.stringify({
|
|
256
|
+
event: "start",
|
|
257
|
+
streamSid: "MZ123",
|
|
258
|
+
start: { callSid: "CA123", customParameters: { token: "token-123" } },
|
|
259
|
+
}),
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
await new Promise((resolve) => setTimeout(resolve, 80));
|
|
263
|
+
expect(ws.readyState).toBe(WebSocket.OPEN);
|
|
264
|
+
|
|
265
|
+
ws.close();
|
|
266
|
+
await waitForClose(ws);
|
|
267
|
+
} finally {
|
|
268
|
+
await server.close();
|
|
269
|
+
}
|
|
270
|
+
});
|
|
271
|
+
});
|
package/src/media-stream.ts
CHANGED
|
@@ -21,6 +21,14 @@ import type {
|
|
|
21
21
|
export interface MediaStreamConfig {
|
|
22
22
|
/** STT provider for transcription */
|
|
23
23
|
sttProvider: OpenAIRealtimeSTTProvider;
|
|
24
|
+
/** Close sockets that never send a valid `start` frame within this window. */
|
|
25
|
+
preStartTimeoutMs?: number;
|
|
26
|
+
/** Max concurrent pre-start sockets. */
|
|
27
|
+
maxPendingConnections?: number;
|
|
28
|
+
/** Max concurrent pre-start sockets from a single source IP. */
|
|
29
|
+
maxPendingConnectionsPerIp?: number;
|
|
30
|
+
/** Max total open sockets (pending + active sessions). */
|
|
31
|
+
maxConnections?: number;
|
|
24
32
|
/** Validate whether to accept a media stream for the given call ID */
|
|
25
33
|
shouldAcceptStream?: (params: { callId: string; streamSid: string; token?: string }) => boolean;
|
|
26
34
|
/** Callback when transcript is received */
|
|
@@ -52,6 +60,16 @@ type TtsQueueEntry = {
|
|
|
52
60
|
reject: (error: unknown) => void;
|
|
53
61
|
};
|
|
54
62
|
|
|
63
|
+
type PendingConnection = {
|
|
64
|
+
ip: string;
|
|
65
|
+
timeout: ReturnType<typeof setTimeout>;
|
|
66
|
+
};
|
|
67
|
+
|
|
68
|
+
const DEFAULT_PRE_START_TIMEOUT_MS = 5000;
|
|
69
|
+
const DEFAULT_MAX_PENDING_CONNECTIONS = 32;
|
|
70
|
+
const DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP = 4;
|
|
71
|
+
const DEFAULT_MAX_CONNECTIONS = 128;
|
|
72
|
+
|
|
55
73
|
/**
|
|
56
74
|
* Manages WebSocket connections for Twilio media streams.
|
|
57
75
|
*/
|
|
@@ -59,6 +77,14 @@ export class MediaStreamHandler {
|
|
|
59
77
|
private wss: WebSocketServer | null = null;
|
|
60
78
|
private sessions = new Map<string, StreamSession>();
|
|
61
79
|
private config: MediaStreamConfig;
|
|
80
|
+
/** Pending sockets that have upgraded but not yet sent an accepted `start` frame. */
|
|
81
|
+
private pendingConnections = new Map<WebSocket, PendingConnection>();
|
|
82
|
+
/** Pending socket count per remote IP for pre-auth throttling. */
|
|
83
|
+
private pendingByIp = new Map<string, number>();
|
|
84
|
+
private preStartTimeoutMs: number;
|
|
85
|
+
private maxPendingConnections: number;
|
|
86
|
+
private maxPendingConnectionsPerIp: number;
|
|
87
|
+
private maxConnections: number;
|
|
62
88
|
/** TTS playback queues per stream (serialize audio to prevent overlap) */
|
|
63
89
|
private ttsQueues = new Map<string, TtsQueueEntry[]>();
|
|
64
90
|
/** Whether TTS is currently playing per stream */
|
|
@@ -68,6 +94,11 @@ export class MediaStreamHandler {
|
|
|
68
94
|
|
|
69
95
|
constructor(config: MediaStreamConfig) {
|
|
70
96
|
this.config = config;
|
|
97
|
+
this.preStartTimeoutMs = config.preStartTimeoutMs ?? DEFAULT_PRE_START_TIMEOUT_MS;
|
|
98
|
+
this.maxPendingConnections = config.maxPendingConnections ?? DEFAULT_MAX_PENDING_CONNECTIONS;
|
|
99
|
+
this.maxPendingConnectionsPerIp =
|
|
100
|
+
config.maxPendingConnectionsPerIp ?? DEFAULT_MAX_PENDING_CONNECTIONS_PER_IP;
|
|
101
|
+
this.maxConnections = config.maxConnections ?? DEFAULT_MAX_CONNECTIONS;
|
|
71
102
|
}
|
|
72
103
|
|
|
73
104
|
/**
|
|
@@ -79,6 +110,12 @@ export class MediaStreamHandler {
|
|
|
79
110
|
this.wss.on("connection", (ws, req) => this.handleConnection(ws, req));
|
|
80
111
|
}
|
|
81
112
|
|
|
113
|
+
const currentConnections = this.wss.clients.size;
|
|
114
|
+
if (currentConnections >= this.maxConnections) {
|
|
115
|
+
this.rejectUpgrade(socket, 503, "Too many media stream connections");
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
|
|
82
119
|
this.wss.handleUpgrade(request, socket, head, (ws) => {
|
|
83
120
|
this.wss?.emit("connection", ws, request);
|
|
84
121
|
});
|
|
@@ -90,6 +127,12 @@ export class MediaStreamHandler {
|
|
|
90
127
|
private async handleConnection(ws: WebSocket, _request: IncomingMessage): Promise<void> {
|
|
91
128
|
let session: StreamSession | null = null;
|
|
92
129
|
const streamToken = this.getStreamToken(_request);
|
|
130
|
+
const ip = this.getClientIp(_request);
|
|
131
|
+
|
|
132
|
+
if (!this.registerPendingConnection(ws, ip)) {
|
|
133
|
+
ws.close(1013, "Too many pending media stream connections");
|
|
134
|
+
return;
|
|
135
|
+
}
|
|
93
136
|
|
|
94
137
|
ws.on("message", async (data: Buffer) => {
|
|
95
138
|
try {
|
|
@@ -102,6 +145,9 @@ export class MediaStreamHandler {
|
|
|
102
145
|
|
|
103
146
|
case "start":
|
|
104
147
|
session = await this.handleStart(ws, message, streamToken);
|
|
148
|
+
if (session) {
|
|
149
|
+
this.clearPendingConnection(ws);
|
|
150
|
+
}
|
|
105
151
|
break;
|
|
106
152
|
|
|
107
153
|
case "media":
|
|
@@ -125,6 +171,7 @@ export class MediaStreamHandler {
|
|
|
125
171
|
});
|
|
126
172
|
|
|
127
173
|
ws.on("close", () => {
|
|
174
|
+
this.clearPendingConnection(ws);
|
|
128
175
|
if (session) {
|
|
129
176
|
this.handleStop(session);
|
|
130
177
|
}
|
|
@@ -226,6 +273,69 @@ export class MediaStreamHandler {
|
|
|
226
273
|
}
|
|
227
274
|
}
|
|
228
275
|
|
|
276
|
+
private getClientIp(request: IncomingMessage): string {
|
|
277
|
+
return request.socket.remoteAddress || "unknown";
|
|
278
|
+
}
|
|
279
|
+
|
|
280
|
+
private registerPendingConnection(ws: WebSocket, ip: string): boolean {
|
|
281
|
+
if (this.pendingConnections.size >= this.maxPendingConnections) {
|
|
282
|
+
console.warn("[MediaStream] Rejecting connection: pending connection limit reached");
|
|
283
|
+
return false;
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
const pendingForIp = this.pendingByIp.get(ip) ?? 0;
|
|
287
|
+
if (pendingForIp >= this.maxPendingConnectionsPerIp) {
|
|
288
|
+
console.warn(`[MediaStream] Rejecting connection: pending per-IP limit reached (${ip})`);
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
const timeout = setTimeout(() => {
|
|
293
|
+
if (!this.pendingConnections.has(ws)) {
|
|
294
|
+
return;
|
|
295
|
+
}
|
|
296
|
+
console.warn(
|
|
297
|
+
`[MediaStream] Closing pre-start idle connection after ${this.preStartTimeoutMs}ms (${ip})`,
|
|
298
|
+
);
|
|
299
|
+
ws.close(1008, "Start timeout");
|
|
300
|
+
}, this.preStartTimeoutMs);
|
|
301
|
+
|
|
302
|
+
timeout.unref?.();
|
|
303
|
+
this.pendingConnections.set(ws, { ip, timeout });
|
|
304
|
+
this.pendingByIp.set(ip, pendingForIp + 1);
|
|
305
|
+
return true;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
private clearPendingConnection(ws: WebSocket): void {
|
|
309
|
+
const pending = this.pendingConnections.get(ws);
|
|
310
|
+
if (!pending) {
|
|
311
|
+
return;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
clearTimeout(pending.timeout);
|
|
315
|
+
this.pendingConnections.delete(ws);
|
|
316
|
+
|
|
317
|
+
const current = this.pendingByIp.get(pending.ip) ?? 0;
|
|
318
|
+
if (current <= 1) {
|
|
319
|
+
this.pendingByIp.delete(pending.ip);
|
|
320
|
+
return;
|
|
321
|
+
}
|
|
322
|
+
this.pendingByIp.set(pending.ip, current - 1);
|
|
323
|
+
}
|
|
324
|
+
|
|
325
|
+
private rejectUpgrade(socket: Duplex, statusCode: 429 | 503, message: string): void {
|
|
326
|
+
const statusText = statusCode === 429 ? "Too Many Requests" : "Service Unavailable";
|
|
327
|
+
const body = `${message}\n`;
|
|
328
|
+
socket.write(
|
|
329
|
+
`HTTP/1.1 ${statusCode} ${statusText}\r\n` +
|
|
330
|
+
"Connection: close\r\n" +
|
|
331
|
+
"Content-Type: text/plain; charset=utf-8\r\n" +
|
|
332
|
+
`Content-Length: ${Buffer.byteLength(body)}\r\n` +
|
|
333
|
+
"\r\n" +
|
|
334
|
+
body,
|
|
335
|
+
);
|
|
336
|
+
socket.destroy();
|
|
337
|
+
}
|
|
338
|
+
|
|
229
339
|
/**
|
|
230
340
|
* Get an active session with an open WebSocket, or undefined if unavailable.
|
|
231
341
|
*/
|
package/src/webhook.ts
CHANGED
|
@@ -77,6 +77,10 @@ export class VoiceCallWebhookServer {
|
|
|
77
77
|
|
|
78
78
|
const streamConfig: MediaStreamConfig = {
|
|
79
79
|
sttProvider,
|
|
80
|
+
preStartTimeoutMs: this.config.streaming?.preStartTimeoutMs,
|
|
81
|
+
maxPendingConnections: this.config.streaming?.maxPendingConnections,
|
|
82
|
+
maxPendingConnectionsPerIp: this.config.streaming?.maxPendingConnectionsPerIp,
|
|
83
|
+
maxConnections: this.config.streaming?.maxConnections,
|
|
80
84
|
shouldAcceptStream: ({ callId, token }) => {
|
|
81
85
|
const call = this.manager.getCallByProviderCallId(callId);
|
|
82
86
|
if (!call) {
|
|
@@ -192,9 +196,8 @@ export class VoiceCallWebhookServer {
|
|
|
192
196
|
// Handle WebSocket upgrades for media streams
|
|
193
197
|
if (this.mediaStreamHandler) {
|
|
194
198
|
this.server.on("upgrade", (request, socket, head) => {
|
|
195
|
-
const
|
|
196
|
-
|
|
197
|
-
if (url.pathname === streamPath) {
|
|
199
|
+
const path = this.getUpgradePathname(request);
|
|
200
|
+
if (path === streamPath) {
|
|
198
201
|
console.log("[voice-call] WebSocket upgrade for media stream");
|
|
199
202
|
this.mediaStreamHandler?.handleUpgrade(request, socket, head);
|
|
200
203
|
} else {
|
|
@@ -269,6 +272,15 @@ export class VoiceCallWebhookServer {
|
|
|
269
272
|
});
|
|
270
273
|
}
|
|
271
274
|
|
|
275
|
+
private getUpgradePathname(request: http.IncomingMessage): string | null {
|
|
276
|
+
try {
|
|
277
|
+
const host = request.headers.host || "localhost";
|
|
278
|
+
return new URL(request.url || "/", `http://${host}`).pathname;
|
|
279
|
+
} catch {
|
|
280
|
+
return null;
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
272
284
|
/**
|
|
273
285
|
* Handle incoming HTTP request.
|
|
274
286
|
*/
|