@openclaw/voice-call 2026.2.21 → 2026.2.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # Changelog
2
2
 
3
+ ## 2026.2.22
4
+
5
+ ### Changes
6
+
7
+ - Version alignment with core OpenClaw release numbers.
8
+
3
9
  ## 2026.1.26
4
10
 
5
11
  ### Changes
package/README.md CHANGED
@@ -76,6 +76,10 @@ Put under `plugins.entries.voice-call.config`:
76
76
  streaming: {
77
77
  enabled: true,
78
78
  streamPath: "/voice/stream",
79
+ preStartTimeoutMs: 5000,
80
+ maxPendingConnections: 32,
81
+ maxPendingConnectionsPerIp: 4,
82
+ maxConnections: 128,
79
83
  },
80
84
  }
81
85
  ```
@@ -87,6 +91,13 @@ Notes:
87
91
  - Telnyx requires `telnyx.publicKey` (or `TELNYX_PUBLIC_KEY`) unless `skipSignatureVerification` is true.
88
92
  - `tunnel.allowNgrokFreeTierLoopbackBypass: true` allows Twilio webhooks with invalid signatures **only** when `tunnel.provider="ngrok"` and `serve.bind` is loopback (ngrok local agent). Use for local dev only.
89
93
 
94
+ Streaming security defaults:
95
+
96
+ - `streaming.preStartTimeoutMs` closes sockets that never send a valid `start` frame.
97
+ - `streaming.maxPendingConnections` caps total unauthenticated pre-start sockets.
98
+ - `streaming.maxPendingConnectionsPerIp` caps unauthenticated pre-start sockets per source IP.
99
+ - `streaming.maxConnections` caps total open media stream sockets (pending + active).
100
+
90
101
  ## Stale call reaper
91
102
 
92
103
  Use `staleCallReaperSeconds` to end calls that never receive a terminal webhook
@@ -164,5 +175,7 @@ Actions:
164
175
  ## Notes
165
176
 
166
177
  - Uses webhook signature verification for Twilio/Telnyx/Plivo.
178
+ - Adds replay protection for Twilio and Plivo webhooks (valid duplicate callbacks are ignored safely).
179
+ - Twilio speech turns include a per-turn token so stale/replayed callbacks cannot complete a newer turn.
167
180
  - `responseModel` / `responseSystemPrompt` control AI auto-responses.
168
181
  - Media streaming requires `ws` and OpenAI Realtime API key.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openclaw/voice-call",
3
- "version": "2026.2.21",
3
+ "version": "2026.2.23",
4
4
  "description": "OpenClaw voice-call plugin",
5
5
  "type": "module",
6
6
  "dependencies": {
package/src/cli.ts CHANGED
@@ -81,6 +81,27 @@ function summarizeSeries(values: number[]): {
81
81
  };
82
82
  }
83
83
 
84
+ function resolveCallMode(mode?: string): "notify" | "conversation" | undefined {
85
+ return mode === "notify" || mode === "conversation" ? mode : undefined;
86
+ }
87
+
88
+ async function initiateCallAndPrintId(params: {
89
+ runtime: VoiceCallRuntime;
90
+ to: string;
91
+ message?: string;
92
+ mode?: string;
93
+ }) {
94
+ const result = await params.runtime.manager.initiateCall(params.to, undefined, {
95
+ message: params.message,
96
+ mode: resolveCallMode(params.mode),
97
+ });
98
+ if (!result.success) {
99
+ throw new Error(result.error || "initiate failed");
100
+ }
101
+ // eslint-disable-next-line no-console
102
+ console.log(JSON.stringify({ callId: result.callId }, null, 2));
103
+ }
104
+
84
105
  export function registerVoiceCallCli(params: {
85
106
  program: Command;
86
107
  config: VoiceCallConfig;
@@ -112,16 +133,12 @@ export function registerVoiceCallCli(params: {
112
133
  if (!to) {
113
134
  throw new Error("Missing --to and no toNumber configured");
114
135
  }
115
- const result = await rt.manager.initiateCall(to, undefined, {
136
+ await initiateCallAndPrintId({
137
+ runtime: rt,
138
+ to,
116
139
  message: options.message,
117
- mode:
118
- options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
140
+ mode: options.mode,
119
141
  });
120
- if (!result.success) {
121
- throw new Error(result.error || "initiate failed");
122
- }
123
- // eslint-disable-next-line no-console
124
- console.log(JSON.stringify({ callId: result.callId }, null, 2));
125
142
  });
126
143
 
127
144
  root
@@ -136,16 +153,12 @@ export function registerVoiceCallCli(params: {
136
153
  )
137
154
  .action(async (options: { to: string; message?: string; mode?: string }) => {
138
155
  const rt = await ensureRuntime();
139
- const result = await rt.manager.initiateCall(options.to, undefined, {
156
+ await initiateCallAndPrintId({
157
+ runtime: rt,
158
+ to: options.to,
140
159
  message: options.message,
141
- mode:
142
- options.mode === "notify" || options.mode === "conversation" ? options.mode : undefined,
160
+ mode: options.mode,
143
161
  });
144
- if (!result.success) {
145
- throw new Error(result.error || "initiate failed");
146
- }
147
- // eslint-disable-next-line no-console
148
- console.log(JSON.stringify({ callId: result.callId }, null, 2));
149
162
  });
150
163
 
151
164
  root
@@ -30,6 +30,10 @@ function createBaseConfig(provider: "telnyx" | "twilio" | "plivo" | "mock"): Voi
30
30
  silenceDurationMs: 800,
31
31
  vadThreshold: 0.5,
32
32
  streamPath: "/voice/stream",
33
+ preStartTimeoutMs: 5000,
34
+ maxPendingConnections: 32,
35
+ maxPendingConnectionsPerIp: 4,
36
+ maxConnections: 128,
33
37
  },
34
38
  skipSignatureVerification: false,
35
39
  stt: { provider: "openai", model: "whisper-1" },
package/src/config.ts CHANGED
@@ -219,6 +219,17 @@ export const VoiceCallStreamingConfigSchema = z
219
219
  vadThreshold: z.number().min(0).max(1).default(0.5),
220
220
  /** WebSocket path for media stream connections */
221
221
  streamPath: z.string().min(1).default("/voice/stream"),
222
+ /**
223
+ * Close unauthenticated media stream sockets if no valid `start` frame arrives in time.
224
+ * Protects against pre-auth idle connection hold attacks.
225
+ */
226
+ preStartTimeoutMs: z.number().int().positive().default(5000),
227
+ /** Maximum number of concurrently pending (pre-start) media stream sockets. */
228
+ maxPendingConnections: z.number().int().positive().default(32),
229
+ /** Maximum pending media stream sockets per source IP. */
230
+ maxPendingConnectionsPerIp: z.number().int().positive().default(4),
231
+ /** Hard cap for all open media stream sockets (pending + active). */
232
+ maxConnections: z.number().int().positive().default(128),
222
233
  })
223
234
  .strict()
224
235
  .default({
@@ -228,6 +239,10 @@ export const VoiceCallStreamingConfigSchema = z
228
239
  silenceDurationMs: 800,
229
240
  vadThreshold: 0.5,
230
241
  streamPath: "/voice/stream",
242
+ preStartTimeoutMs: 5000,
243
+ maxPendingConnections: 32,
244
+ maxPendingConnectionsPerIp: 4,
245
+ maxConnections: 128,
231
246
  });
232
247
  export type VoiceCallStreamingConfig = z.infer<typeof VoiceCallStreamingConfigSchema>;
233
248
 
@@ -6,6 +6,7 @@ export type TranscriptWaiter = {
6
6
  resolve: (text: string) => void;
7
7
  reject: (err: Error) => void;
8
8
  timeout: NodeJS.Timeout;
9
+ turnToken?: string;
9
10
  };
10
11
 
11
12
  export type CallManagerRuntimeState = {
@@ -45,34 +45,57 @@ function createProvider(overrides: Partial<VoiceCallProvider> = {}): VoiceCallPr
45
45
  };
46
46
  }
47
47
 
48
+ function createInboundDisabledConfig() {
49
+ return VoiceCallConfigSchema.parse({
50
+ enabled: true,
51
+ provider: "plivo",
52
+ fromNumber: "+15550000000",
53
+ inboundPolicy: "disabled",
54
+ });
55
+ }
56
+
57
+ function createInboundInitiatedEvent(params: {
58
+ id: string;
59
+ providerCallId: string;
60
+ from: string;
61
+ }): NormalizedEvent {
62
+ return {
63
+ id: params.id,
64
+ type: "call.initiated",
65
+ callId: params.providerCallId,
66
+ providerCallId: params.providerCallId,
67
+ timestamp: Date.now(),
68
+ direction: "inbound",
69
+ from: params.from,
70
+ to: "+15550000000",
71
+ };
72
+ }
73
+
74
+ function createRejectingInboundContext(): {
75
+ ctx: CallManagerContext;
76
+ hangupCalls: HangupCallInput[];
77
+ } {
78
+ const hangupCalls: HangupCallInput[] = [];
79
+ const provider = createProvider({
80
+ hangupCall: async (input: HangupCallInput): Promise<void> => {
81
+ hangupCalls.push(input);
82
+ },
83
+ });
84
+ const ctx = createContext({
85
+ config: createInboundDisabledConfig(),
86
+ provider,
87
+ });
88
+ return { ctx, hangupCalls };
89
+ }
90
+
48
91
  describe("processEvent (functional)", () => {
49
92
  it("calls provider hangup when rejecting inbound call", () => {
50
- const hangupCalls: HangupCallInput[] = [];
51
- const provider = createProvider({
52
- hangupCall: async (input: HangupCallInput): Promise<void> => {
53
- hangupCalls.push(input);
54
- },
55
- });
56
-
57
- const ctx = createContext({
58
- config: VoiceCallConfigSchema.parse({
59
- enabled: true,
60
- provider: "plivo",
61
- fromNumber: "+15550000000",
62
- inboundPolicy: "disabled",
63
- }),
64
- provider,
65
- });
66
- const event: NormalizedEvent = {
93
+ const { ctx, hangupCalls } = createRejectingInboundContext();
94
+ const event = createInboundInitiatedEvent({
67
95
  id: "evt-1",
68
- type: "call.initiated",
69
- callId: "prov-1",
70
96
  providerCallId: "prov-1",
71
- timestamp: Date.now(),
72
- direction: "inbound",
73
97
  from: "+15559999999",
74
- to: "+15550000000",
75
- };
98
+ });
76
99
 
77
100
  processEvent(ctx, event);
78
101
 
@@ -87,24 +110,14 @@ describe("processEvent (functional)", () => {
87
110
 
88
111
  it("does not call hangup when provider is null", () => {
89
112
  const ctx = createContext({
90
- config: VoiceCallConfigSchema.parse({
91
- enabled: true,
92
- provider: "plivo",
93
- fromNumber: "+15550000000",
94
- inboundPolicy: "disabled",
95
- }),
113
+ config: createInboundDisabledConfig(),
96
114
  provider: null,
97
115
  });
98
- const event: NormalizedEvent = {
116
+ const event = createInboundInitiatedEvent({
99
117
  id: "evt-2",
100
- type: "call.initiated",
101
- callId: "prov-2",
102
118
  providerCallId: "prov-2",
103
- timestamp: Date.now(),
104
- direction: "inbound",
105
119
  from: "+15551111111",
106
- to: "+15550000000",
107
- };
120
+ });
108
121
 
109
122
  processEvent(ctx, event);
110
123
 
@@ -112,31 +125,12 @@ describe("processEvent (functional)", () => {
112
125
  });
113
126
 
114
127
  it("calls hangup only once for duplicate events for same rejected call", () => {
115
- const hangupCalls: HangupCallInput[] = [];
116
- const provider = createProvider({
117
- hangupCall: async (input: HangupCallInput): Promise<void> => {
118
- hangupCalls.push(input);
119
- },
120
- });
121
- const ctx = createContext({
122
- config: VoiceCallConfigSchema.parse({
123
- enabled: true,
124
- provider: "plivo",
125
- fromNumber: "+15550000000",
126
- inboundPolicy: "disabled",
127
- }),
128
- provider,
129
- });
130
- const event1: NormalizedEvent = {
128
+ const { ctx, hangupCalls } = createRejectingInboundContext();
129
+ const event1 = createInboundInitiatedEvent({
131
130
  id: "evt-init",
132
- type: "call.initiated",
133
- callId: "prov-dup",
134
131
  providerCallId: "prov-dup",
135
- timestamp: Date.now(),
136
- direction: "inbound",
137
132
  from: "+15552222222",
138
- to: "+15550000000",
139
- };
133
+ });
140
134
  const event2: NormalizedEvent = {
141
135
  id: "evt-ring",
142
136
  type: "call.ringing",
@@ -228,26 +222,61 @@ describe("processEvent (functional)", () => {
228
222
  },
229
223
  });
230
224
  const ctx = createContext({
231
- config: VoiceCallConfigSchema.parse({
232
- enabled: true,
233
- provider: "plivo",
234
- fromNumber: "+15550000000",
235
- inboundPolicy: "disabled",
236
- }),
225
+ config: createInboundDisabledConfig(),
237
226
  provider,
238
227
  });
239
- const event: NormalizedEvent = {
228
+ const event = createInboundInitiatedEvent({
240
229
  id: "evt-fail",
241
- type: "call.initiated",
242
- callId: "prov-fail",
243
230
  providerCallId: "prov-fail",
244
- timestamp: Date.now(),
245
- direction: "inbound",
246
231
  from: "+15553333333",
247
- to: "+15550000000",
248
- };
232
+ });
249
233
 
250
234
  expect(() => processEvent(ctx, event)).not.toThrow();
251
235
  expect(ctx.activeCalls.size).toBe(0);
252
236
  });
237
+
238
+ it("deduplicates by dedupeKey even when event IDs differ", () => {
239
+ const now = Date.now();
240
+ const ctx = createContext();
241
+ ctx.activeCalls.set("call-dedupe", {
242
+ callId: "call-dedupe",
243
+ providerCallId: "provider-dedupe",
244
+ provider: "plivo",
245
+ direction: "outbound",
246
+ state: "answered",
247
+ from: "+15550000000",
248
+ to: "+15550000001",
249
+ startedAt: now,
250
+ transcript: [],
251
+ processedEventIds: [],
252
+ metadata: {},
253
+ });
254
+ ctx.providerCallIdMap.set("provider-dedupe", "call-dedupe");
255
+
256
+ processEvent(ctx, {
257
+ id: "evt-1",
258
+ dedupeKey: "stable-key-1",
259
+ type: "call.speech",
260
+ callId: "call-dedupe",
261
+ providerCallId: "provider-dedupe",
262
+ timestamp: now + 1,
263
+ transcript: "hello",
264
+ isFinal: true,
265
+ });
266
+
267
+ processEvent(ctx, {
268
+ id: "evt-2",
269
+ dedupeKey: "stable-key-1",
270
+ type: "call.speech",
271
+ callId: "call-dedupe",
272
+ providerCallId: "provider-dedupe",
273
+ timestamp: now + 2,
274
+ transcript: "hello",
275
+ isFinal: true,
276
+ });
277
+
278
+ const call = ctx.activeCalls.get("call-dedupe");
279
+ expect(call?.transcript).toHaveLength(1);
280
+ expect(Array.from(ctx.processedEventIds)).toEqual(["stable-key-1"]);
281
+ });
253
282
  });
@@ -92,10 +92,11 @@ function createInboundCall(params: {
92
92
  }
93
93
 
94
94
  export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
95
- if (ctx.processedEventIds.has(event.id)) {
95
+ const dedupeKey = event.dedupeKey || event.id;
96
+ if (ctx.processedEventIds.has(dedupeKey)) {
96
97
  return;
97
98
  }
98
- ctx.processedEventIds.add(event.id);
99
+ ctx.processedEventIds.add(dedupeKey);
99
100
 
100
101
  let call = findCall({
101
102
  activeCalls: ctx.activeCalls,
@@ -158,7 +159,7 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
158
159
  }
159
160
  }
160
161
 
161
- call.processedEventIds.push(event.id);
162
+ call.processedEventIds.push(dedupeKey);
162
163
 
163
164
  switch (event.type) {
164
165
  case "call.initiated":
@@ -192,8 +193,20 @@ export function processEvent(ctx: EventContext, event: NormalizedEvent): void {
192
193
 
193
194
  case "call.speech":
194
195
  if (event.isFinal) {
196
+ const hadWaiter = ctx.transcriptWaiters.has(call.callId);
197
+ const resolved = resolveTranscriptWaiter(
198
+ ctx,
199
+ call.callId,
200
+ event.transcript,
201
+ event.turnToken,
202
+ );
203
+ if (hadWaiter && !resolved) {
204
+ console.warn(
205
+ `[voice-call] Ignoring speech event with mismatched turn token for ${call.callId}`,
206
+ );
207
+ break;
208
+ }
195
209
  addTranscriptEntry(call, "user", event.transcript);
196
- resolveTranscriptWaiter(ctx, call.callId, event.transcript);
197
210
  }
198
211
  transitionState(call, "listening");
199
212
  break;
@@ -51,6 +51,57 @@ type EndCallContext = Pick<
51
51
  | "maxDurationTimers"
52
52
  >;
53
53
 
54
+ type ConnectedCallContext = Pick<CallManagerContext, "activeCalls" | "provider">;
55
+
56
+ type ConnectedCallLookup =
57
+ | { kind: "error"; error: string }
58
+ | { kind: "ended"; call: CallRecord }
59
+ | {
60
+ kind: "ok";
61
+ call: CallRecord;
62
+ providerCallId: string;
63
+ provider: NonNullable<ConnectedCallContext["provider"]>;
64
+ };
65
+
66
+ type ConnectedCallResolution =
67
+ | { ok: false; error: string }
68
+ | {
69
+ ok: true;
70
+ call: CallRecord;
71
+ providerCallId: string;
72
+ provider: NonNullable<ConnectedCallContext["provider"]>;
73
+ };
74
+
75
+ function lookupConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallLookup {
76
+ const call = ctx.activeCalls.get(callId);
77
+ if (!call) {
78
+ return { kind: "error", error: "Call not found" };
79
+ }
80
+ if (!ctx.provider || !call.providerCallId) {
81
+ return { kind: "error", error: "Call not connected" };
82
+ }
83
+ if (TerminalStates.has(call.state)) {
84
+ return { kind: "ended", call };
85
+ }
86
+ return { kind: "ok", call, providerCallId: call.providerCallId, provider: ctx.provider };
87
+ }
88
+
89
+ function requireConnectedCall(ctx: ConnectedCallContext, callId: CallId): ConnectedCallResolution {
90
+ const lookup = lookupConnectedCall(ctx, callId);
91
+ if (lookup.kind === "error") {
92
+ return { ok: false, error: lookup.error };
93
+ }
94
+ if (lookup.kind === "ended") {
95
+ return { ok: false, error: "Call has ended" };
96
+ }
97
+ return {
98
+ ok: true,
99
+ call: lookup.call,
100
+ providerCallId: lookup.providerCallId,
101
+ provider: lookup.provider,
102
+ };
103
+ }
104
+
54
105
  export async function initiateCall(
55
106
  ctx: InitiateContext,
56
107
  to: string,
@@ -149,26 +200,22 @@ export async function speak(
149
200
  callId: CallId,
150
201
  text: string,
151
202
  ): Promise<{ success: boolean; error?: string }> {
152
- const call = ctx.activeCalls.get(callId);
153
- if (!call) {
154
- return { success: false, error: "Call not found" };
155
- }
156
- if (!ctx.provider || !call.providerCallId) {
157
- return { success: false, error: "Call not connected" };
158
- }
159
- if (TerminalStates.has(call.state)) {
160
- return { success: false, error: "Call has ended" };
203
+ const connected = requireConnectedCall(ctx, callId);
204
+ if (!connected.ok) {
205
+ return { success: false, error: connected.error };
161
206
  }
207
+ const { call, providerCallId, provider } = connected;
208
+
162
209
  try {
163
210
  transitionState(call, "speaking");
164
211
  persistCallRecord(ctx.storePath, call);
165
212
 
166
213
  addTranscriptEntry(call, "bot", text);
167
214
 
168
- const voice = ctx.provider?.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
169
- await ctx.provider.playTts({
215
+ const voice = provider.name === "twilio" ? ctx.config.tts?.openai?.voice : undefined;
216
+ await provider.playTts({
170
217
  callId,
171
- providerCallId: call.providerCallId,
218
+ providerCallId,
172
219
  text,
173
220
  voice,
174
221
  });
@@ -232,22 +279,19 @@ export async function continueCall(
232
279
  callId: CallId,
233
280
  prompt: string,
234
281
  ): Promise<{ success: boolean; transcript?: string; error?: string }> {
235
- const call = ctx.activeCalls.get(callId);
236
- if (!call) {
237
- return { success: false, error: "Call not found" };
238
- }
239
- if (!ctx.provider || !call.providerCallId) {
240
- return { success: false, error: "Call not connected" };
241
- }
242
- if (TerminalStates.has(call.state)) {
243
- return { success: false, error: "Call has ended" };
282
+ const connected = requireConnectedCall(ctx, callId);
283
+ if (!connected.ok) {
284
+ return { success: false, error: connected.error };
244
285
  }
286
+ const { call, providerCallId, provider } = connected;
287
+
245
288
  if (ctx.activeTurnCalls.has(callId) || ctx.transcriptWaiters.has(callId)) {
246
289
  return { success: false, error: "Already waiting for transcript" };
247
290
  }
248
291
  ctx.activeTurnCalls.add(callId);
249
292
 
250
293
  const turnStartedAt = Date.now();
294
+ const turnToken = provider.name === "twilio" ? crypto.randomUUID() : undefined;
251
295
 
252
296
  try {
253
297
  await speak(ctx, callId, prompt);
@@ -256,13 +300,13 @@ export async function continueCall(
256
300
  persistCallRecord(ctx.storePath, call);
257
301
 
258
302
  const listenStartedAt = Date.now();
259
- await ctx.provider.startListening({ callId, providerCallId: call.providerCallId });
303
+ await provider.startListening({ callId, providerCallId, turnToken });
260
304
 
261
- const transcript = await waitForFinalTranscript(ctx, callId);
305
+ const transcript = await waitForFinalTranscript(ctx, callId, turnToken);
262
306
  const transcriptReceivedAt = Date.now();
263
307
 
264
308
  // Best-effort: stop listening after final transcript.
265
- await ctx.provider.stopListening({ callId, providerCallId: call.providerCallId });
309
+ await provider.stopListening({ callId, providerCallId });
266
310
 
267
311
  const lastTurnLatencyMs = transcriptReceivedAt - turnStartedAt;
268
312
  const lastTurnListenWaitMs = transcriptReceivedAt - listenStartedAt;
@@ -302,21 +346,19 @@ export async function endCall(
302
346
  ctx: EndCallContext,
303
347
  callId: CallId,
304
348
  ): Promise<{ success: boolean; error?: string }> {
305
- const call = ctx.activeCalls.get(callId);
306
- if (!call) {
307
- return { success: false, error: "Call not found" };
349
+ const lookup = lookupConnectedCall(ctx, callId);
350
+ if (lookup.kind === "error") {
351
+ return { success: false, error: lookup.error };
308
352
  }
309
- if (!ctx.provider || !call.providerCallId) {
310
- return { success: false, error: "Call not connected" };
311
- }
312
- if (TerminalStates.has(call.state)) {
353
+ if (lookup.kind === "ended") {
313
354
  return { success: true };
314
355
  }
356
+ const { call, providerCallId, provider } = lookup;
315
357
 
316
358
  try {
317
- await ctx.provider.hangupCall({
359
+ await provider.hangupCall({
318
360
  callId,
319
- providerCallId: call.providerCallId,
361
+ providerCallId,
320
362
  reason: "hangup-bot",
321
363
  });
322
364
 
@@ -329,9 +371,7 @@ export async function endCall(
329
371
  rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
330
372
 
331
373
  ctx.activeCalls.delete(callId);
332
- if (call.providerCallId) {
333
- ctx.providerCallIdMap.delete(call.providerCallId);
334
- }
374
+ ctx.providerCallIdMap.delete(providerCallId);
335
375
 
336
376
  return { success: true };
337
377
  } catch (err) {
@@ -77,16 +77,25 @@ export function resolveTranscriptWaiter(
77
77
  ctx: TranscriptWaiterContext,
78
78
  callId: CallId,
79
79
  transcript: string,
80
- ): void {
80
+ turnToken?: string,
81
+ ): boolean {
81
82
  const waiter = ctx.transcriptWaiters.get(callId);
82
83
  if (!waiter) {
83
- return;
84
+ return false;
85
+ }
86
+ if (waiter.turnToken && waiter.turnToken !== turnToken) {
87
+ return false;
84
88
  }
85
89
  clearTranscriptWaiter(ctx, callId);
86
90
  waiter.resolve(transcript);
91
+ return true;
87
92
  }
88
93
 
89
- export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promise<string> {
94
+ export function waitForFinalTranscript(
95
+ ctx: TimerContext,
96
+ callId: CallId,
97
+ turnToken?: string,
98
+ ): Promise<string> {
90
99
  if (ctx.transcriptWaiters.has(callId)) {
91
100
  return Promise.reject(new Error("Already waiting for transcript"));
92
101
  }
@@ -98,6 +107,6 @@ export function waitForFinalTranscript(ctx: TimerContext, callId: CallId): Promi
98
107
  reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
99
108
  }, timeoutMs);
100
109
 
101
- ctx.transcriptWaiters.set(callId, { resolve, reject, timeout });
110
+ ctx.transcriptWaiters.set(callId, { resolve, reject, timeout, turnToken });
102
111
  });
103
112
  }