@openclaw/voice-call 2026.2.21 → 2026.2.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +6 -0
- package/README.md +13 -0
- package/package.json +1 -1
- package/src/cli.ts +29 -16
- package/src/config.test.ts +4 -0
- package/src/config.ts +15 -0
- package/src/manager/context.ts +1 -0
- package/src/manager/events.test.ts +100 -71
- package/src/manager/events.ts +17 -4
- package/src/manager/outbound.ts +76 -36
- package/src/manager/timers.ts +13 -4
- package/src/manager.test.ts +109 -127
- package/src/media-stream.test.ts +175 -0
- package/src/media-stream.ts +110 -0
- package/src/providers/plivo.ts +84 -39
- package/src/providers/twilio/webhook.ts +1 -0
- package/src/providers/twilio.test.ts +34 -0
- package/src/providers/twilio.ts +54 -3
- package/src/types.ts +8 -0
- package/src/webhook-security.test.ts +76 -0
- package/src/webhook-security.ts +100 -17
- package/src/webhook.test.ts +51 -1
- package/src/webhook.ts +24 -8
package/CHANGELOG.md
CHANGED
package/README.md
CHANGED
|
@@ -76,6 +76,10 @@ Put under `plugins.entries.voice-call.config`:
|
|
|
76
76
|
streaming: {
|
|
77
77
|
enabled: true,
|
|
78
78
|
streamPath: "/voice/stream",
|
|
79
|
+
preStartTimeoutMs: 5000,
|
|
80
|
+
maxPendingConnections: 32,
|
|
81
|
+
maxPendingConnectionsPerIp: 4,
|
|
82
|
+
maxConnections: 128,
|
|
79
83
|
},
|
|
80
84
|
}
|
|
81
85
|
```
|
|
@@ -87,6 +91,13 @@ Notes:
|
|
|
87
91
|
- Telnyx requires `telnyx.publicKey` (or `TELNYX_PUBLIC_KEY`) unless `skipSignatureVerification` is true.
|
|
88
92
|
- `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
|
|
89
93
|
|
|
94
|
+
Streaming security defaults:
|
|
95
|
+
|
|
96
|
+
- `streaming.preStartTimeoutMs` closes sockets that never send a valid `start` frame.
|
|
97
|
+
- `streaming.maxPendingConnections` caps total unauthenticated pre-start sockets.
|
|
98
|
+
- `streaming.maxPendingConnectionsPerIp` caps unauthenticated pre-start sockets per source IP.
|
|
99
|
+
- `streaming.maxConnections` caps total open media stream sockets (pending + active).
|
|
100
|
+
|
|
90
101
|
## Stale call reaper
|
|
91
102
|
|
|
92
103
|
Use `staleCallReaperSeconds` to end calls that never receive a terminal webhook
|
|
@@ -164,5 +175,7 @@ Actions:
|
|
|
164
175
|
## Notes
|
|
165
176
|
|
|
166
177
|
- Uses webhook signature verification for Twilio/Telnyx/Plivo.
|
|
178
|
+
- Adds replay protection for Twilio and Plivo webhooks (valid duplicate callbacks are ignored safely).
|
|
179
|
+
- Twilio speech turns include a per-turn token so stale/replayed callbacks cannot complete a newer turn.
|
|
167
180
|
- `responseModel` / `responseSystemPrompt` control AI auto-responses.
|
|
168
181
|
- Media streaming requires `ws` and OpenAI Realtime API key.
|
package/package.json
CHANGED
package/src/cli.ts
CHANGED
|
@@ -81,6 +81,27 @@ function summarizeSeries(values: number[]): {
|
|
|
81
81
|
};
|
|
82
82
|
}
|
|
83
83
|
|
|
84
|
+
function resolveCallMode(mode?: string): "notify" | "conversation" | undefined {
|
|
85
|
+
return mode === "notify" || mode === "conversation" ? mode : undefined;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
async function initiateCallAndPrintId(params: {
|
|
89
|
+
runtime: VoiceCallRuntime;
|
|
90
|
+
to: string;
|
|
91
|
+
message?: string;
|
|
92
|
+
mode?: string;
|
|
93
|
+
}) {
|
|
94
|
+
const result = await params.runtime.manager.initiateCall(params.to, undefined, {
|
|
95
|
+
message: params.message,
|
|
96
|
+
mode: resolveCallMode(params.mode),
|
|
97
|
+
});
|
|
98
|
+
if (!result.success) {
|
|
99
|
+
throw new Error(result.error || "initiate failed");
|
|
100
|
+
}
|
|
101
|
+
// eslint-disable-next-line no-console
|
|
102
|
+
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
103
|
+
}
|
|
104
|
+
|
|
84
105
|
export function registerVoiceCallCli(params: {
|
|
85
106
|
program: Command;
|
|
86
107
|
config: VoiceCallConfig;
|
|
@@ -112,16 +133,12 @@ export function registerVoiceCallCli(params: {
|
|
|
112
133
|
if (!to) {
|
|
113
134
|
throw new Error("Missing --to and no toNumber configured");
|
|
114
135
|
}
|
|
115
|
-
|
|
136
|
+
await initiateCallAndPrintId({
|
|
137
|
+
runtime: rt,
|
|
138
|
+
to,
|
|
116
139
|
message: options.message,
|
|
117
|
-
mode:
|
|
118
|
-
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
|
|
140
|
+
mode: options.mode,
|
|
119
141
|
});
|
|
120
|
-
if (!result.success) {
|
|
121
|
-
throw new Error(result.error || "initiate failed");
|
|
122
|
-
}
|
|
123
|
-
// eslint-disable-next-line no-console
|
|
124
|
-
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
125
142
|
});
|
|
126
143
|
|
|
127
144
|
root
|
|
@@ -136,16 +153,12 @@ export function registerVoiceCallCli(params: {
|
|
|
136
153
|
)
|
|
137
154
|
.action(async (options: { to: string; message?: string; mode?: string }) => {
|
|
138
155
|
const rt = await ensureRuntime();
|
|
139
|
-
|
|
156
|
+
await initiateCallAndPrintId({
|
|
157
|
+
runtime: rt,
|
|
158
|
+
to: options.to,
|
|
140
159
|
message: options.message,
|
|
141
|
-
mode:
|
|
142
|
-
options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
|
|
160
|
+
mode: options.mode,
|
|
143
161
|
});
|
|
144
|
-
if (!result.success) {
|
|
145
|
-
throw new Error(result.error || "initiate failed");
|
|
146
|
-
}
|
|
147
|
-
// eslint-disable-next-line no-console
|
|
148
|
-
console.log(JSON.stringify({ callId: result.callId }, null, 2));
|
|
149
162
|
});
|
|
150
163
|
|
|
151
164
|
root
|
package/src/config.test.ts
CHANGED
|
@@ -30,6 +30,10 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
|
|
|
30
30
|
silenceDurationMs: 800,
|
|
31
31
|
vadThreshold: 0.5,
|
|
32
32
|
streamPath: "/voice/stream",
|
|
33
|
+
preStartTimeoutMs: 5000,
|
|
34
|
+
maxPendingConnections: 32,
|
|
35
|
+
maxPendingConnectionsPerIp: 4,
|
|
36
|
+
maxConnections: 128,
|
|
33
37
|
},
|
|
34
38
|
skipSignatureVerification: false,
|
|
35
39
|
stt: { provider: "openai", model: "whisper-1" },
|
package/src/config.ts
CHANGED
|
@@ -219,6 +219,17 @@ export const VoiceCallStreamingConfigSchema = z
|
|
|
219
219
|
vadThreshold: z.number().min(0).max(1).default(0.5),
|
|
220
220
|
/** WebSocket path for media stream connections */
|
|
221
221
|
streamPath: z.string().min(1).default("/voice/stream"),
|
|
222
|
+
/**
|
|
223
|
+
* Close unauthenticated media stream sockets if no valid `start` frame arrives in time.
|
|
224
|
+
* Protects against pre-auth idle connection hold attacks.
|
|
225
|
+
*/
|
|
226
|
+
preStartTimeoutMs: z.number().int().positive().default(5000),
|
|
227
|
+
/** Maximum number of concurrently pending (pre-start) media stream sockets. */
|
|
228
|
+
maxPendingConnections: z.number().int().positive().default(32),
|
|
229
|
+
/** Maximum pending media stream sockets per source IP. */
|
|
230
|
+
maxPendingConnectionsPerIp: z.number().int().positive().default(4),
|
|
231
|
+
/** Hard cap for all open media stream sockets (pending + active). */
|
|
232
|
+
maxConnections: z.number().int().positive().default(128),
|
|
222
233
|
})
|
|
223
234
|
.strict()
|
|
224
235
|
.default({
|
|
@@ -228,6 +239,10 @@ export const VoiceCallStreamingConfigSchema = z
|
|
|
228
239
|
silenceDurationMs: 800,
|
|
229
240
|
vadThreshold: 0.5,
|
|
230
241
|
streamPath: "/voice/stream",
|
|
242
|
+
preStartTimeoutMs: 5000,
|
|
243
|
+
maxPendingConnections: 32,
|
|
244
|
+
maxPendingConnectionsPerIp: 4,
|
|
245
|
+
maxConnections: 128,
|
|
231
246
|
});
|
|
232
247
|
export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
|
|
233
248
|
|
package/src/manager/context.ts
CHANGED
|
@@ -45,34 +45,57 @@ function createProvider(overrides: Partial<VoiceCallProvider> = {}): VoiceCallPr
|
|
|
45
45
|
};
|
|
46
46
|
}
|
|
47
47
|
|
|
48
|
+
function createInboundDisabledConfig() {
|
|
49
|
+
return VoiceCallConfigSchema.parse({
|
|
50
|
+
enabled: true,
|
|
51
|
+
provider: "plivo",
|
|
52
|
+
fromNumber: "+15550000000",
|
|
53
|
+
inboundPolicy: "disabled",
|
|
54
|
+
});
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function createInboundInitiatedEvent(params: {
|
|
58
|
+
id: string;
|
|
59
|
+
providerCallId: string;
|
|
60
|
+
from: string;
|
|
61
|
+
}): NormalizedEvent {
|
|
62
|
+
return {
|
|
63
|
+
id: params.id,
|
|
64
|
+
type: "call.initiated",
|
|
65
|
+
callId: params.providerCallId,
|
|
66
|
+
providerCallId: params.providerCallId,
|
|
67
|
+
timestamp: Date.now(),
|
|
68
|
+
direction: "inbound",
|
|
69
|
+
from: params.from,
|
|
70
|
+
to: "+15550000000",
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
function createRejectingInboundContext(): {
|
|
75
|
+
ctx: CallManagerContext;
|
|
76
|
+
hangupCalls: HangupCallInput[];
|
|
77
|
+
} {
|
|
78
|
+
const hangupCalls: HangupCallInput[] = [];
|
|
79
|
+
const provider = createProvider({
|
|
80
|
+
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
81
|
+
hangupCalls.push(input);
|
|
82
|
+
},
|
|
83
|
+
});
|
|
84
|
+
const ctx = createContext({
|
|
85
|
+
config: createInboundDisabledConfig(),
|
|
86
|
+
provider,
|
|
87
|
+
});
|
|
88
|
+
return { ctx, hangupCalls };
|
|
89
|
+
}
|
|
90
|
+
|
|
48
91
|
describe("processEvent (functional)", () => {
|
|
49
92
|
it("calls provider hangup when rejecting inbound call", () => {
|
|
50
|
-
const hangupCalls
|
|
51
|
-
const
|
|
52
|
-
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
53
|
-
hangupCalls.push(input);
|
|
54
|
-
},
|
|
55
|
-
});
|
|
56
|
-
|
|
57
|
-
const ctx = createContext({
|
|
58
|
-
config: VoiceCallConfigSchema.parse({
|
|
59
|
-
enabled: true,
|
|
60
|
-
provider: "plivo",
|
|
61
|
-
fromNumber: "+15550000000",
|
|
62
|
-
inboundPolicy: "disabled",
|
|
63
|
-
}),
|
|
64
|
-
provider,
|
|
65
|
-
});
|
|
66
|
-
const event: NormalizedEvent = {
|
|
93
|
+
const { ctx, hangupCalls } = createRejectingInboundContext();
|
|
94
|
+
const event = createInboundInitiatedEvent({
|
|
67
95
|
id: "evt-1",
|
|
68
|
-
type: "call.initiated",
|
|
69
|
-
callId: "prov-1",
|
|
70
96
|
providerCallId: "prov-1",
|
|
71
|
-
timestamp: Date.now(),
|
|
72
|
-
direction: "inbound",
|
|
73
97
|
from: "+15559999999",
|
|
74
|
-
|
|
75
|
-
};
|
|
98
|
+
});
|
|
76
99
|
|
|
77
100
|
processEvent(ctx, event);
|
|
78
101
|
|
|
@@ -87,24 +110,14 @@ describe("processEvent (functional)", () => {
|
|
|
87
110
|
|
|
88
111
|
it("does not call hangup when provider is null", () => {
|
|
89
112
|
const ctx = createContext({
|
|
90
|
-
config:
|
|
91
|
-
enabled: true,
|
|
92
|
-
provider: "plivo",
|
|
93
|
-
fromNumber: "+15550000000",
|
|
94
|
-
inboundPolicy: "disabled",
|
|
95
|
-
}),
|
|
113
|
+
config: createInboundDisabledConfig(),
|
|
96
114
|
provider: null,
|
|
97
115
|
});
|
|
98
|
-
const event
|
|
116
|
+
const event = createInboundInitiatedEvent({
|
|
99
117
|
id: "evt-2",
|
|
100
|
-
type: "call.initiated",
|
|
101
|
-
callId: "prov-2",
|
|
102
118
|
providerCallId: "prov-2",
|
|
103
|
-
timestamp: Date.now(),
|
|
104
|
-
direction: "inbound",
|
|
105
119
|
from: "+15551111111",
|
|
106
|
-
|
|
107
|
-
};
|
|
120
|
+
});
|
|
108
121
|
|
|
109
122
|
processEvent(ctx, event);
|
|
110
123
|
|
|
@@ -112,31 +125,12 @@ describe("processEvent (functional)", () => {
|
|
|
112
125
|
});
|
|
113
126
|
|
|
114
127
|
it("calls hangup only once for duplicate events for same rejected call", () => {
|
|
115
|
-
const hangupCalls
|
|
116
|
-
const
|
|
117
|
-
hangupCall: async (input: HangupCallInput): Promise<void> => {
|
|
118
|
-
hangupCalls.push(input);
|
|
119
|
-
},
|
|
120
|
-
});
|
|
121
|
-
const ctx = createContext({
|
|
122
|
-
config: VoiceCallConfigSchema.parse({
|
|
123
|
-
enabled: true,
|
|
124
|
-
provider: "plivo",
|
|
125
|
-
fromNumber: "+15550000000",
|
|
126
|
-
inboundPolicy: "disabled",
|
|
127
|
-
}),
|
|
128
|
-
provider,
|
|
129
|
-
});
|
|
130
|
-
const event1: NormalizedEvent = {
|
|
128
|
+
const { ctx, hangupCalls } = createRejectingInboundContext();
|
|
129
|
+
const event1 = createInboundInitiatedEvent({
|
|
131
130
|
id: "evt-init",
|
|
132
|
-
type: "call.initiated",
|
|
133
|
-
callId: "prov-dup",
|
|
134
131
|
providerCallId: "prov-dup",
|
|
135
|
-
timestamp: Date.now(),
|
|
136
|
-
direction: "inbound",
|
|
137
132
|
from: "+15552222222",
|
|
138
|
-
|
|
139
|
-
};
|
|
133
|
+
});
|
|
140
134
|
const event2: NormalizedEvent = {
|
|
141
135
|
id: "evt-ring",
|
|
142
136
|
type: "call.ringing",
|
|
@@ -228,26 +222,61 @@ describe("processEvent (functional)", () => {
|
|
|
228
222
|
},
|
|
229
223
|
});
|
|
230
224
|
const ctx = createContext({
|
|
231
|
-
config:
|
|
232
|
-
enabled: true,
|
|
233
|
-
provider: "plivo",
|
|
234
|
-
fromNumber: "+15550000000",
|
|
235
|
-
inboundPolicy: "disabled",
|
|
236
|
-
}),
|
|
225
|
+
config: createInboundDisabledConfig(),
|
|
237
226
|
provider,
|
|
238
227
|
});
|
|
239
|
-
const event
|
|
228
|
+
const event = createInboundInitiatedEvent({
|
|
240
229
|
id: "evt-fail",
|
|
241
|
-
type: "call.initiated",
|
|
242
|
-
callId: "prov-fail",
|
|
243
230
|
providerCallId: "prov-fail",
|
|
244
|
-
timestamp: Date.now(),
|
|
245
|
-
direction: "inbound",
|
|
246
231
|
from: "+15553333333",
|
|
247
|
-
|
|
248
|
-
};
|
|
232
|
+
});
|
|
249
233
|
|
|
250
234
|
expect(() => processEvent(ctx, event)).not.toThrow();
|
|
251
235
|
expect(ctx.activeCalls.size).toBe(0);
|
|
252
236
|
});
|
|
237
|
+
|
|
238
|
+
it("deduplicates by dedupeKey even when event IDs differ", () => {
|
|
239
|
+
const now = Date.now();
|
|
240
|
+
const ctx = createContext();
|
|
241
|
+
ctx.activeCalls.set("call-dedupe", {
|
|
242
|
+
callId: "call-dedupe",
|
|
243
|
+
providerCallId: "provider-dedupe",
|
|
244
|
+
provider: "plivo",
|
|
245
|
+
direction: "outbound",
|
|
246
|
+
state: "answered",
|
|
247
|
+
from: "+15550000000",
|
|
248
|
+
to: "+15550000001",
|
|
249
|
+
startedAt: now,
|
|
250
|
+
transcript: [],
|
|
251
|
+
processedEventIds: [],
|
|
252
|
+
metadata: {},
|
|
253
|
+
});
|
|
254
|
+
ctx.providerCallIdMap.set("provider-dedupe", "call-dedupe");
|
|
255
|
+
|
|
256
|
+
processEvent(ctx, {
|
|
257
|
+
id: "evt-1",
|
|
258
|
+
dedupeKey: "stable-key-1",
|
|
259
|
+
type: "call.speech",
|
|
260
|
+
callId: "call-dedupe",
|
|
261
|
+
providerCallId: "provider-dedupe",
|
|
262
|
+
timestamp: now + 1,
|
|
263
|
+
transcript: "hello",
|
|
264
|
+
isFinal: true,
|
|
265
|
+
});
|
|
266
|
+
|
|
267
|
+
processEvent(ctx, {
|
|
268
|
+
id: "evt-2",
|
|
269
|
+
dedupeKey: "stable-key-1",
|
|
270
|
+
type: "call.speech",
|
|
271
|
+
callId: "call-dedupe",
|
|
272
|
+
providerCallId: "provider-dedupe",
|
|
273
|
+
timestamp: now + 2,
|
|
274
|
+
transcript: "hello",
|
|
275
|
+
isFinal: true,
|
|
276
|
+
});
|
|
277
|
+
|
|
278
|
+
const call = ctx.activeCalls.get("call-dedupe");
|
|
279
|
+
expect(call?.transcript).toHaveLength(1);
|
|
280
|
+
expect(Array.from(ctx.processedEventIds)).toEqual(["stable-key-1"]);
|
|
281
|
+
});
|
|
253
282
|
});
|
package/src/manager/events.ts
CHANGED
|
@@ -92,10 +92,11 @@ function createInboundCall(params: {
|
|
|
92
92
|
}
|
|
93
93
|
|
|
94
94
|
export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
|
95
|
-
|
|
95
|
+
const dedupeKey = event.dedupeKey || event.id;
|
|
96
|
+
if (ctx.processedEventIds.has(dedupeKey)) {
|
|
96
97
|
return;
|
|
97
98
|
}
|
|
98
|
-
ctx.processedEventIds.add(
|
|
99
|
+
ctx.processedEventIds.add(dedupeKey);
|
|
99
100
|
|
|
100
101
|
let call = findCall({
|
|
101
102
|
activeCalls: ctx.activeCalls,
|
|
@@ -158,7 +159,7 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
|
|
158
159
|
}
|
|
159
160
|
}
|
|
160
161
|
|
|
161
|
-
call.processedEventIds.push(
|
|
162
|
+
call.processedEventIds.push(dedupeKey);
|
|
162
163
|
|
|
163
164
|
switch (event.type) {
|
|
164
165
|
case "call.initiated":
|
|
@@ -192,8 +193,20 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
|
|
|
192
193
|
|
|
193
194
|
case "call.speech":
|
|
194
195
|
if (event.isFinal) {
|
|
196
|
+
const hadWaiter = ctx.transcriptWaiters.has(call.callId);
|
|
197
|
+
const resolved = resolveTranscriptWaiter(
|
|
198
|
+
ctx,
|
|
199
|
+
call.callId,
|
|
200
|
+
event.transcript,
|
|
201
|
+
event.turnToken,
|
|
202
|
+
);
|
|
203
|
+
if (hadWaiter && !resolved) {
|
|
204
|
+
console.warn(
|
|
205
|
+
`[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`,
|
|
206
|
+
);
|
|
207
|
+
break;
|
|
208
|
+
}
|
|
195
209
|
addTranscriptEntry(call, "user", event.transcript);
|
|
196
|
-
resolveTranscriptWaiter(ctx, call.callId, event.transcript);
|
|
197
210
|
}
|
|
198
211
|
transitionState(call, "listening");
|
|
199
212
|
break;
|
package/src/manager/outbound.ts
CHANGED
|
@@ -51,6 +51,57 @@ type EndCallContext = Pick<
|
|
|
51
51
|
| "maxDurationTimers"
|
|
52
52
|
>;
|
|
53
53
|
|
|
54
|
+
type ConnectedCallContext = Pick<CallManagerContext, "activeCalls" | "provider">;
|
|
55
|
+
|
|
56
|
+
type ConnectedCallLookup =
|
|
57
|
+
| { kind: "error"; error: string }
|
|
58
|
+
| { kind: "ended"; call: CallRecord }
|
|
59
|
+
| {
|
|
60
|
+
kind: "ok";
|
|
61
|
+
call: CallRecord;
|
|
62
|
+
providerCallId: string;
|
|
63
|
+
provider: NonNullable<ConnectedCallContext["provider"]>;
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
type ConnectedCallResolution =
|
|
67
|
+
| { ok: false; error: string }
|
|
68
|
+
| {
|
|
69
|
+
ok: true;
|
|
70
|
+
call: CallRecord;
|
|
71
|
+
providerCallId: string;
|
|
72
|
+
provider: NonNullable<ConnectedCallContext["provider"]>;
|
|
73
|
+
};
|
|
74
|
+
|
|
75
|
+
function lookupConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallLookup {
|
|
76
|
+
const call = ctx.activeCalls.get(callId);
|
|
77
|
+
if (!call) {
|
|
78
|
+
return { kind: "error", error: "Call not found" };
|
|
79
|
+
}
|
|
80
|
+
if (!ctx.provider || !call.providerCallId) {
|
|
81
|
+
return { kind: "error", error: "Call not connected" };
|
|
82
|
+
}
|
|
83
|
+
if (TerminalStates.has(call.state)) {
|
|
84
|
+
return { kind: "ended", call };
|
|
85
|
+
}
|
|
86
|
+
return { kind: "ok", call, providerCallId: call.providerCallId, provider: ctx.provider };
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallResolution {
|
|
90
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
91
|
+
if (lookup.kind === "error") {
|
|
92
|
+
return { ok: false, error: lookup.error };
|
|
93
|
+
}
|
|
94
|
+
if (lookup.kind === "ended") {
|
|
95
|
+
return { ok: false, error: "Call has ended" };
|
|
96
|
+
}
|
|
97
|
+
return {
|
|
98
|
+
ok: true,
|
|
99
|
+
call: lookup.call,
|
|
100
|
+
providerCallId: lookup.providerCallId,
|
|
101
|
+
provider: lookup.provider,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
|
|
54
105
|
export async function initiateCall(
|
|
55
106
|
ctx: InitiateContext,
|
|
56
107
|
to: string,
|
|
@@ -149,26 +200,22 @@ export async function speak(
|
|
|
149
200
|
callId: CallId,
|
|
150
201
|
text: string,
|
|
151
202
|
): Promise<{ success: boolean; error?: string }> {
|
|
152
|
-
const
|
|
153
|
-
if (!
|
|
154
|
-
return { success: false, error:
|
|
155
|
-
}
|
|
156
|
-
if (!ctx.provider || !call.providerCallId) {
|
|
157
|
-
return { success: false, error: "Call not connected" };
|
|
158
|
-
}
|
|
159
|
-
if (TerminalStates.has(call.state)) {
|
|
160
|
-
return { success: false, error: "Call has ended" };
|
|
203
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
204
|
+
if (!connected.ok) {
|
|
205
|
+
return { success: false, error: connected.error };
|
|
161
206
|
}
|
|
207
|
+
const { call, providerCallId, provider } = connected;
|
|
208
|
+
|
|
162
209
|
try {
|
|
163
210
|
transitionState(call, "speaking");
|
|
164
211
|
persistCallRecord(ctx.storePath, call);
|
|
165
212
|
|
|
166
213
|
addTranscriptEntry(call, "bot", text);
|
|
167
214
|
|
|
168
|
-
const voice =
|
|
169
|
-
await
|
|
215
|
+
const voice = provider.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
|
|
216
|
+
await provider.playTts({
|
|
170
217
|
callId,
|
|
171
|
-
providerCallId
|
|
218
|
+
providerCallId,
|
|
172
219
|
text,
|
|
173
220
|
voice,
|
|
174
221
|
});
|
|
@@ -232,22 +279,19 @@ export async function continueCall(
|
|
|
232
279
|
callId: CallId,
|
|
233
280
|
prompt: string,
|
|
234
281
|
): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
|
235
|
-
const
|
|
236
|
-
if (!
|
|
237
|
-
return { success: false, error:
|
|
238
|
-
}
|
|
239
|
-
if (!ctx.provider || !call.providerCallId) {
|
|
240
|
-
return { success: false, error: "Call not connected" };
|
|
241
|
-
}
|
|
242
|
-
if (TerminalStates.has(call.state)) {
|
|
243
|
-
return { success: false, error: "Call has ended" };
|
|
282
|
+
const connected = requireConnectedCall(ctx, callId);
|
|
283
|
+
if (!connected.ok) {
|
|
284
|
+
return { success: false, error: connected.error };
|
|
244
285
|
}
|
|
286
|
+
const { call, providerCallId, provider } = connected;
|
|
287
|
+
|
|
245
288
|
if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) {
|
|
246
289
|
return { success: false, error: "Already waiting for transcript" };
|
|
247
290
|
}
|
|
248
291
|
ctx.activeTurnCalls.add(callId);
|
|
249
292
|
|
|
250
293
|
const turnStartedAt = Date.now();
|
|
294
|
+
const turnToken = provider.name === "twilio" ? crypto.randomUUID() : undefined;
|
|
251
295
|
|
|
252
296
|
try {
|
|
253
297
|
await speak(ctx, callId, prompt);
|
|
@@ -256,13 +300,13 @@ export async function continueCall(
|
|
|
256
300
|
persistCallRecord(ctx.storePath, call);
|
|
257
301
|
|
|
258
302
|
const listenStartedAt = Date.now();
|
|
259
|
-
await
|
|
303
|
+
await provider.startListening({ callId, providerCallId, turnToken });
|
|
260
304
|
|
|
261
|
-
const transcript = await waitForFinalTranscript(ctx, callId);
|
|
305
|
+
const transcript = await waitForFinalTranscript(ctx, callId, turnToken);
|
|
262
306
|
const transcriptReceivedAt = Date.now();
|
|
263
307
|
|
|
264
308
|
// Best-effort: stop listening after final transcript.
|
|
265
|
-
await
|
|
309
|
+
await provider.stopListening({ callId, providerCallId });
|
|
266
310
|
|
|
267
311
|
const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
|
|
268
312
|
const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
|
|
@@ -302,21 +346,19 @@ export async function endCall(
|
|
|
302
346
|
ctx: EndCallContext,
|
|
303
347
|
callId: CallId,
|
|
304
348
|
): Promise<{ success: boolean; error?: string }> {
|
|
305
|
-
const
|
|
306
|
-
if (
|
|
307
|
-
return { success: false, error:
|
|
349
|
+
const lookup = lookupConnectedCall(ctx, callId);
|
|
350
|
+
if (lookup.kind === "error") {
|
|
351
|
+
return { success: false, error: lookup.error };
|
|
308
352
|
}
|
|
309
|
-
if (
|
|
310
|
-
return { success: false, error: "Call not connected" };
|
|
311
|
-
}
|
|
312
|
-
if (TerminalStates.has(call.state)) {
|
|
353
|
+
if (lookup.kind === "ended") {
|
|
313
354
|
return { success: true };
|
|
314
355
|
}
|
|
356
|
+
const { call, providerCallId, provider } = lookup;
|
|
315
357
|
|
|
316
358
|
try {
|
|
317
|
-
await
|
|
359
|
+
await provider.hangupCall({
|
|
318
360
|
callId,
|
|
319
|
-
providerCallId
|
|
361
|
+
providerCallId,
|
|
320
362
|
reason: "hangup-bot",
|
|
321
363
|
});
|
|
322
364
|
|
|
@@ -329,9 +371,7 @@ export async function endCall(
|
|
|
329
371
|
rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
|
|
330
372
|
|
|
331
373
|
ctx.activeCalls.delete(callId);
|
|
332
|
-
|
|
333
|
-
ctx.providerCallIdMap.delete(call.providerCallId);
|
|
334
|
-
}
|
|
374
|
+
ctx.providerCallIdMap.delete(providerCallId);
|
|
335
375
|
|
|
336
376
|
return { success: true };
|
|
337
377
|
} catch (err) {
|
package/src/manager/timers.ts
CHANGED
|
@@ -77,16 +77,25 @@ export function resolveTranscriptWaiter(
|
|
|
77
77
|
ctx: TranscriptWaiterContext,
|
|
78
78
|
callId: CallId,
|
|
79
79
|
transcript: string,
|
|
80
|
-
|
|
80
|
+
turnToken?: string,
|
|
81
|
+
): boolean {
|
|
81
82
|
const waiter = ctx.transcriptWaiters.get(callId);
|
|
82
83
|
if (!waiter) {
|
|
83
|
-
return;
|
|
84
|
+
return false;
|
|
85
|
+
}
|
|
86
|
+
if (waiter.turnToken && waiter.turnToken !== turnToken) {
|
|
87
|
+
return false;
|
|
84
88
|
}
|
|
85
89
|
clearTranscriptWaiter(ctx, callId);
|
|
86
90
|
waiter.resolve(transcript);
|
|
91
|
+
return true;
|
|
87
92
|
}
|
|
88
93
|
|
|
89
|
-
export function waitForFinalTranscript(
|
|
94
|
+
export function waitForFinalTranscript(
|
|
95
|
+
ctx: TimerContext,
|
|
96
|
+
callId: CallId,
|
|
97
|
+
turnToken?: string,
|
|
98
|
+
): Promise<string> {
|
|
90
99
|
if (ctx.transcriptWaiters.has(callId)) {
|
|
91
100
|
return Promise.reject(new Error("Already waiting for transcript"));
|
|
92
101
|
}
|
|
@@ -98,6 +107,6 @@ export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promi
|
|
|
98
107
|
reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
|
|
99
108
|
}, timeoutMs);
|
|
100
109
|
|
|
101
|
-
ctx.transcriptWaiters.set(callId, { resolve, reject, timeout });
|
|
110
|
+
ctx.transcriptWaiters.set(callId, { resolve, reject, timeout, turnToken });
|
|
102
111
|
});
|
|
103
112
|
}
|