@openclaw/voice-call 2026.5.1-beta.1 → 2026.5.2-beta.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -40,6 +40,7 @@ Put under `plugins.entries.voice-call.config`:
40
40
  provider: "twilio", // or "telnyx" | "plivo" | "mock"
41
41
  fromNumber: "+15550001234",
42
42
  toNumber: "+15550005678",
43
+ sessionScope: "per-phone", // or "per-call"
43
44
 
44
45
  twilio: {
45
46
  accountSid: "ACxxxxxxxx",
@@ -104,6 +105,7 @@ Notes:
104
105
  - If older configs still use `provider: "log"`, `twilio.from`, or legacy `streaming.*` OpenAI keys, run `openclaw doctor --fix` to rewrite them.
105
106
  - advanced webhook, streaming, and tunnel notes: `https://docs.openclaw.ai/plugins/voice-call`
106
107
  - `responseModel` is optional. When unset, voice responses use the runtime default model.
108
+ - `sessionScope` defaults to `per-phone`, preserving caller memory across calls. Use `per-call` for reception, booking, IVR, and bridge flows where each carrier call should start fresh.
107
109
 
108
110
  ## Stale call reaper
109
111
 
package/index.test.ts CHANGED
@@ -6,6 +6,7 @@ import { createTestPluginApi } from "openclaw/plugin-sdk/plugin-test-api";
6
6
  import { afterEach, beforeEach, describe, expect, it, vi } from "vitest";
7
7
  import type { OpenClawPluginApi } from "./api.js";
8
8
  import type { VoiceCallRuntime } from "./runtime-entry.js";
9
+ import type { CallRecord } from "./src/types.js";
9
10
 
10
11
  let runtimeStub: VoiceCallRuntime;
11
12
 
@@ -52,8 +53,12 @@ function captureStdout() {
52
53
  }
53
54
 
54
55
  function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
56
+ const call = createCallRecord({ callId });
55
57
  return {
56
- config: { toNumber: "+15550001234" } as VoiceCallRuntime["config"],
58
+ config: {
59
+ toNumber: "+15550001234",
60
+ realtime: { enabled: false },
61
+ } as VoiceCallRuntime["config"],
57
62
  provider: {} as VoiceCallRuntime["provider"],
58
63
  manager: {
59
64
  initiateCall: vi.fn(async () => ({ callId, success: true })),
@@ -64,17 +69,35 @@ function createRuntimeStub(callId = "call-1"): VoiceCallRuntime {
64
69
  speak: vi.fn(async () => ({ success: true })),
65
70
  sendDtmf: vi.fn(async () => ({ success: true })),
66
71
  endCall: vi.fn(async () => ({ success: true })),
67
- getCall: vi.fn((id: string) => (id === callId ? { callId } : undefined)),
72
+ getCall: vi.fn((id: string) => (id === callId ? call : undefined)),
68
73
  getCallByProviderCallId: vi.fn(() => undefined),
69
- getActiveCalls: vi.fn(() => [{ callId }]),
74
+ getActiveCalls: vi.fn(() => [call]),
75
+ getCallHistory: vi.fn(async () => []),
70
76
  } as unknown as VoiceCallRuntime["manager"],
71
- webhookServer: {} as VoiceCallRuntime["webhookServer"],
77
+ webhookServer: {
78
+ speakRealtime: vi.fn(() => ({ success: false, error: "No active realtime bridge for call" })),
79
+ } as unknown as VoiceCallRuntime["webhookServer"],
72
80
  webhookUrl: "http://127.0.0.1:3334/voice/webhook",
73
81
  publicUrl: null,
74
82
  stop: vi.fn(async () => {}),
75
83
  };
76
84
  }
77
85
 
86
+ function createCallRecord(overrides: Partial<CallRecord> = {}): CallRecord {
87
+ return {
88
+ callId: "call-1",
89
+ provider: "mock",
90
+ direction: "outbound",
91
+ state: "active",
92
+ from: "+15550001111",
93
+ to: "+15550001234",
94
+ startedAt: Date.UTC(2026, 4, 2, 9, 0, 0),
95
+ transcript: [],
96
+ processedEventIds: [],
97
+ ...overrides,
98
+ };
99
+ }
100
+
78
101
  function createServiceContext(): Parameters<NonNullable<Registered["service"]>["start"]>[0] {
79
102
  return {
80
103
  config: {},
@@ -397,6 +420,60 @@ describe("voice-call plugin", () => {
397
420
  expect(respond.mock.calls[0]).toEqual([true, { success: true }]);
398
421
  });
399
422
 
423
+ it("normalizes provider call ids before speaking", async () => {
424
+ runtimeStub.manager.getCall = vi.fn(() => undefined);
425
+ runtimeStub.manager.getCallByProviderCallId = vi.fn(() =>
426
+ createCallRecord({
427
+ callId: "call-1",
428
+ providerCallId: "CA123",
429
+ }),
430
+ );
431
+ const { methods } = setup({ provider: "mock" });
432
+ const handler = methods.get("voicecall.speak") as
433
+ | ((ctx: {
434
+ params: Record<string, unknown>;
435
+ respond: ReturnType<typeof vi.fn>;
436
+ }) => Promise<void>)
437
+ | undefined;
438
+ const respond = vi.fn();
439
+
440
+ await handler?.({ params: { callId: "CA123", message: "hello" }, respond });
441
+
442
+ expect(runtimeStub.manager.speak).toHaveBeenCalledWith("call-1", "hello");
443
+ expect(respond.mock.calls[0]).toEqual([true, { success: true }]);
444
+ });
445
+
446
+ it("reports ended call history when speaking to a stale call", async () => {
447
+ runtimeStub.manager.getCall = vi.fn(() => undefined);
448
+ runtimeStub.manager.getCallByProviderCallId = vi.fn(() => undefined);
449
+ runtimeStub.manager.getCallHistory = vi.fn(async () => [
450
+ createCallRecord({
451
+ callId: "call-1",
452
+ providerCallId: "CA123",
453
+ state: "completed",
454
+ endReason: "completed",
455
+ endedAt: Date.UTC(2026, 4, 2, 9, 18, 23),
456
+ }),
457
+ ]);
458
+ const { methods } = setup({ provider: "mock" });
459
+ const handler = methods.get("voicecall.speak") as
460
+ | ((ctx: {
461
+ params: Record<string, unknown>;
462
+ respond: ReturnType<typeof vi.fn>;
463
+ }) => Promise<void>)
464
+ | undefined;
465
+ const respond = vi.fn();
466
+
467
+ await handler?.({ params: { callId: "CA123", message: "hello" }, respond });
468
+
469
+ const [ok, , error] = respond.mock.calls[0] ?? [];
470
+ expect(ok).toBe(false);
471
+ expect(error.message).toContain("call is not active");
472
+ expect(error.message).toContain("last state=completed");
473
+ expect(error.message).toContain("endReason=completed");
474
+ expect(runtimeStub.manager.speak).not.toHaveBeenCalled();
475
+ });
476
+
400
477
  it("normalizes legacy config through runtime creation and warns to run doctor", async () => {
401
478
  const { methods } = setup({
402
479
  enabled: true,
package/index.ts CHANGED
@@ -42,6 +42,11 @@ const voiceCallConfigSchema = {
42
42
  inboundPolicy: { label: "Inbound Policy" },
43
43
  allowFrom: { label: "Inbound Allowlist" },
44
44
  inboundGreeting: { label: "Inbound Greeting", advanced: true },
45
+ numbers: {
46
+ label: "Per-number Routing",
47
+ help: "Inbound overrides keyed by dialed E.164 number.",
48
+ advanced: true,
49
+ },
45
50
  "telnyx.apiKey": { label: "Telnyx API Key", sensitive: true },
46
51
  "telnyx.connectionId": { label: "Telnyx Connection ID" },
47
52
  "telnyx.publicKey": { label: "Telnyx Public Key", sensitive: true },
@@ -297,6 +302,22 @@ export default definePluginEntry({
297
302
  respondError(respond, formatErrorMessage(err));
298
303
  };
299
304
 
305
+ const describeHistoricalCall = async (rt: VoiceCallRuntime, callId: string) => {
306
+ const history = await rt.manager.getCallHistory(100);
307
+ const call = history
308
+ .toReversed()
309
+ .find((candidate) => candidate.callId === callId || candidate.providerCallId === callId);
310
+ if (!call) {
311
+ return undefined;
312
+ }
313
+ const details = [
314
+ `last state=${call.state}`,
315
+ call.endReason ? `endReason=${call.endReason}` : undefined,
316
+ call.endedAt ? `endedAt=${new Date(call.endedAt).toISOString()}` : undefined,
317
+ ].filter(Boolean);
318
+ return `call is not active (${details.join(", ")})`;
319
+ };
320
+
300
321
  const resolveCallMessageRequest = async (params: GatewayRequestHandlerOptions["params"]) => {
301
322
  const callId = normalizeOptionalString(params?.callId) ?? "";
302
323
  const message = normalizeOptionalString(params?.message) ?? "";
@@ -304,7 +325,11 @@ export default definePluginEntry({
304
325
  return { error: "callId and message required" } as const;
305
326
  }
306
327
  const rt = await ensureRuntime();
307
- return { rt, callId, message } as const;
328
+ const activeCall = rt.manager.getCall(callId) ?? rt.manager.getCallByProviderCallId(callId);
329
+ if (activeCall) {
330
+ return { rt, callId: activeCall.callId, message } as const;
331
+ }
332
+ return { error: (await describeHistoricalCall(rt, callId)) ?? "Call not found" } as const;
308
333
  };
309
334
 
310
335
  const initiateCallAndRespond = async (params: {
@@ -5,6 +5,9 @@
5
5
  "onStartup": true,
6
6
  "onCommands": ["voicecall"]
7
7
  },
8
+ "contracts": {
9
+ "tools": ["voice_call"]
10
+ },
8
11
  "channelEnvVars": {
9
12
  "voice-call": [
10
13
  "TELNYX_API_KEY",
@@ -42,6 +45,11 @@
42
45
  "label": "Inbound Greeting",
43
46
  "advanced": true
44
47
  },
48
+ "numbers": {
49
+ "label": "Per-number Routing",
50
+ "help": "Inbound overrides keyed by dialed E.164 number.",
51
+ "advanced": true
52
+ },
45
53
  "telnyx.apiKey": {
46
54
  "label": "Telnyx API Key",
47
55
  "sensitive": true
@@ -186,6 +194,10 @@
186
194
  "label": "Call Log Store Path",
187
195
  "advanced": true
188
196
  },
197
+ "sessionScope": {
198
+ "label": "Session Scope",
199
+ "help": "Use per-phone to preserve caller memory across calls, or per-call to isolate every call into a fresh voice session."
200
+ },
189
201
  "responseModel": {
190
202
  "label": "Response Model",
191
203
  "help": "Optional override. Falls back to the runtime default model when unset.",
@@ -272,6 +284,38 @@
272
284
  "inboundGreeting": {
273
285
  "type": "string"
274
286
  },
287
+ "numbers": {
288
+ "type": "object",
289
+ "propertyNames": {
290
+ "pattern": "^\\+[1-9]\\d{1,14}$"
291
+ },
292
+ "additionalProperties": {
293
+ "type": "object",
294
+ "additionalProperties": false,
295
+ "properties": {
296
+ "inboundGreeting": {
297
+ "type": "string"
298
+ },
299
+ "tts": {
300
+ "$ref": "#/properties/tts"
301
+ },
302
+ "agentId": {
303
+ "type": "string",
304
+ "minLength": 1
305
+ },
306
+ "responseModel": {
307
+ "type": "string"
308
+ },
309
+ "responseSystemPrompt": {
310
+ "type": "string"
311
+ },
312
+ "responseTimeoutMs": {
313
+ "type": "integer",
314
+ "minimum": 1
315
+ }
316
+ }
317
+ }
318
+ },
275
319
  "outbound": {
276
320
  "type": "object",
277
321
  "additionalProperties": false,
@@ -764,6 +808,10 @@
764
808
  "store": {
765
809
  "type": "string"
766
810
  },
811
+ "sessionScope": {
812
+ "type": "string",
813
+ "enum": ["per-phone", "per-call"]
814
+ },
767
815
  "responseModel": {
768
816
  "type": "string"
769
817
  },
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@openclaw/voice-call",
3
- "version": "2026.5.1-beta.1",
3
+ "version": "2026.5.2-beta.2",
4
4
  "description": "OpenClaw voice-call plugin",
5
5
  "repository": {
6
6
  "type": "git",
@@ -17,7 +17,7 @@
17
17
  "openclaw": "workspace:*"
18
18
  },
19
19
  "peerDependencies": {
20
- "openclaw": ">=2026.4.25"
20
+ "openclaw": ">=2026.5.2-beta.2"
21
21
  },
22
22
  "peerDependenciesMeta": {
23
23
  "openclaw": {
@@ -29,13 +29,15 @@
29
29
  "./index.ts"
30
30
  ],
31
31
  "install": {
32
+ "npmSpec": "@openclaw/voice-call",
33
+ "defaultChoice": "npm",
32
34
  "minHostVersion": ">=2026.4.10"
33
35
  },
34
36
  "compat": {
35
- "pluginApi": ">=2026.4.25"
37
+ "pluginApi": ">=2026.5.2-beta.2"
36
38
  },
37
39
  "build": {
38
- "openclawVersion": "2026.5.1-beta.1"
40
+ "openclawVersion": "2026.5.2-beta.2"
39
41
  },
40
42
  "release": {
41
43
  "publishToClawHub": true,
@@ -2,6 +2,9 @@ import { afterEach, beforeEach, describe, expect, it } from "vitest";
2
2
  import {
3
3
  VoiceCallConfigSchema,
4
4
  resolveTwilioAuthToken,
5
+ resolveVoiceCallEffectiveConfig,
6
+ resolveVoiceCallNumberRouteKey,
7
+ resolveVoiceCallSessionKey,
5
8
  validateProviderConfig,
6
9
  normalizeVoiceCallConfig,
7
10
  resolveVoiceCallConfig,
@@ -256,6 +259,116 @@ describe("resolveVoiceCallConfig", () => {
256
259
 
257
260
  expect(config.staleCallReaperSeconds).toBe(120);
258
261
  });
262
+
263
+ it("keeps voice sessions scoped by phone by default", () => {
264
+ const config = resolveVoiceCallConfig({ enabled: true, provider: "mock" });
265
+
266
+ expect(config.sessionScope).toBe("per-phone");
267
+ expect(
268
+ resolveVoiceCallSessionKey({
269
+ config,
270
+ callId: "call-123",
271
+ phone: "+1 (555) 000-1111",
272
+ }),
273
+ ).toBe("voice:15550001111");
274
+ });
275
+
276
+ it("can scope voice sessions to each call", () => {
277
+ const config = resolveVoiceCallConfig({
278
+ enabled: true,
279
+ provider: "mock",
280
+ sessionScope: "per-call",
281
+ });
282
+
283
+ expect(config.sessionScope).toBe("per-call");
284
+ expect(
285
+ resolveVoiceCallSessionKey({
286
+ config,
287
+ callId: "call-123",
288
+ phone: "+1 (555) 000-1111",
289
+ }),
290
+ ).toBe("voice:call:call-123");
291
+ });
292
+
293
+ it("preserves explicit voice session keys", () => {
294
+ const config = resolveVoiceCallConfig({
295
+ enabled: true,
296
+ provider: "mock",
297
+ sessionScope: "per-call",
298
+ });
299
+
300
+ expect(
301
+ resolveVoiceCallSessionKey({
302
+ config,
303
+ callId: "call-123",
304
+ phone: "+1 (555) 000-1111",
305
+ explicitSessionKey: "meet-room-1",
306
+ }),
307
+ ).toBe("meet-room-1");
308
+ });
309
+
310
+ it("resolves per-number inbound route overrides over global voice settings", () => {
311
+ const config = resolveVoiceCallConfig({
312
+ enabled: true,
313
+ provider: "mock",
314
+ inboundGreeting: "Hello from global.",
315
+ agentId: "main",
316
+ responseModel: "openai/gpt-5.4-mini",
317
+ responseSystemPrompt: "Global voice assistant.",
318
+ responseTimeoutMs: 10000,
319
+ tts: {
320
+ provider: "openai",
321
+ providers: {
322
+ openai: { voice: "coral", speed: 1 },
323
+ },
324
+ },
325
+ numbers: {
326
+ "+15550001111": {
327
+ inboundGreeting: "Silver Fox Cards, how can I help?",
328
+ agentId: "cards",
329
+ responseModel: "openai/gpt-5.5",
330
+ responseSystemPrompt: "You are a baseball card expert.",
331
+ responseTimeoutMs: 20000,
332
+ tts: {
333
+ providers: {
334
+ openai: { voice: "alloy" },
335
+ },
336
+ },
337
+ },
338
+ },
339
+ });
340
+
341
+ expect(resolveVoiceCallNumberRouteKey(config, "+1 (555) 000-1111")).toBe("+15550001111");
342
+ const effective = resolveVoiceCallEffectiveConfig(config, "+1 (555) 000-1111");
343
+
344
+ expect(effective.numberRouteKey).toBe("+15550001111");
345
+ expect(effective.config.inboundGreeting).toBe("Silver Fox Cards, how can I help?");
346
+ expect(effective.config.agentId).toBe("cards");
347
+ expect(effective.config.responseModel).toBe("openai/gpt-5.5");
348
+ expect(effective.config.responseSystemPrompt).toBe("You are a baseball card expert.");
349
+ expect(effective.config.responseTimeoutMs).toBe(20000);
350
+ expect(effective.config.tts?.provider).toBe("openai");
351
+ expect(effective.config.tts?.providers?.openai).toEqual({ voice: "alloy", speed: 1 });
352
+ });
353
+
354
+ it("falls back to global voice settings when no per-number route matches", () => {
355
+ const config = resolveVoiceCallConfig({
356
+ enabled: true,
357
+ provider: "mock",
358
+ inboundGreeting: "Hello from global.",
359
+ numbers: {
360
+ "+15550001111": {
361
+ inboundGreeting: "Hello from route.",
362
+ },
363
+ },
364
+ });
365
+
366
+ const effective = resolveVoiceCallEffectiveConfig(config, "+15550002222");
367
+
368
+ expect(effective.numberRouteKey).toBeUndefined();
369
+ expect(effective.config).toBe(config);
370
+ expect(effective.config.inboundGreeting).toBe("Hello from global.");
371
+ });
259
372
  });
260
373
 
261
374
  describe("normalizeVoiceCallConfig", () => {
package/src/config.ts CHANGED
@@ -74,6 +74,24 @@ export type PlivoConfig = z.infer<typeof PlivoConfigSchema>;
74
74
 
75
75
  export type VoiceCallTtsConfig = z.infer<typeof TtsConfigSchema>;
76
76
 
77
+ const VoiceCallNumberRouteConfigSchema = z
78
+ .object({
79
+ /** Greeting message for inbound calls to this number. */
80
+ inboundGreeting: z.string().optional(),
81
+ /** TTS override for inbound calls to this number. Deep-merges with global voice-call TTS. */
82
+ tts: TtsConfigSchema,
83
+ /** Agent ID to use for voice response generation for this number. */
84
+ agentId: z.string().min(1).optional(),
85
+ /** Optional model override for voice responses for this number. */
86
+ responseModel: z.string().optional(),
87
+ /** System prompt for voice responses for this number. */
88
+ responseSystemPrompt: z.string().optional(),
89
+ /** Timeout for response generation in ms for this number. */
90
+ responseTimeoutMs: z.number().int().positive().optional(),
91
+ })
92
+ .strict();
93
+ export type VoiceCallNumberRouteConfig = z.infer<typeof VoiceCallNumberRouteConfigSchema>;
94
+
77
95
  // -----------------------------------------------------------------------------
78
96
  // Webhook Server Configuration
79
97
  // -----------------------------------------------------------------------------
@@ -173,6 +191,9 @@ export type WebhookSecurityConfig = z.infer<typeof VoiceCallWebhookSecurityConfi
173
191
  const CallModeSchema = z.enum(["notify", "conversation"]);
174
192
  export type CallMode = z.infer<typeof CallModeSchema>;
175
193
 
194
+ const VoiceCallSessionScopeSchema = z.enum(["per-phone", "per-call"]);
195
+ export type VoiceCallSessionScope = z.infer<typeof VoiceCallSessionScopeSchema>;
196
+
176
197
  const OutboundConfigSchema = z
177
198
  .object({
178
199
  /** Default call mode for outbound calls */
@@ -350,6 +371,9 @@ export const VoiceCallConfigSchema = z
350
371
  /** Greeting message for inbound calls */
351
372
  inboundGreeting: z.string().optional(),
352
373
 
374
+ /** Per-dialed-number overrides for inbound calls. Keys are E.164 numbers. */
375
+ numbers: z.record(E164Schema, VoiceCallNumberRouteConfigSchema).default({}),
376
+
353
377
  /** Outbound call configuration */
354
378
  outbound: OutboundConfigSchema,
355
379
 
@@ -393,6 +417,9 @@ export const VoiceCallConfigSchema = z
393
417
  /** Realtime voice-to-voice configuration */
394
418
  realtime: VoiceCallRealtimeConfigSchema,
395
419
 
420
+ /** Session memory scope for voice conversations. */
421
+ sessionScope: VoiceCallSessionScopeSchema.default("per-phone"),
422
+
396
423
  /** Public webhook URL override (if set, bypasses tunnel auto-detection) */
397
424
  publicUrl: z.string().url().optional(),
398
425
 
@@ -420,6 +447,10 @@ export const VoiceCallConfigSchema = z
420
447
  .strict();
421
448
 
422
449
  export type VoiceCallConfig = z.infer<typeof VoiceCallConfigSchema>;
450
+ export type VoiceCallEffectiveConfigResult = {
451
+ config: VoiceCallConfig;
452
+ numberRouteKey?: string;
453
+ };
423
454
  type DeepPartial<T> = T extends SecretInput
424
455
  ? T
425
456
  : T extends Array<infer U>
@@ -474,6 +505,56 @@ function normalizeVoiceCallTtsConfig(
474
505
  return TtsConfigSchema.parse(deepMergeDefined(defaults ?? {}, overrides ?? {}));
475
506
  }
476
507
 
508
+ function normalizePhoneRouteKey(phone: string | undefined): string {
509
+ return phone?.replace(/\D/g, "") ?? "";
510
+ }
511
+
512
+ export function resolveVoiceCallNumberRouteKey(
513
+ config: Pick<VoiceCallConfig, "numbers">,
514
+ phone: string | undefined,
515
+ ): string | undefined {
516
+ const routes = config.numbers;
517
+ if (!routes) {
518
+ return undefined;
519
+ }
520
+ if (phone && Object.prototype.hasOwnProperty.call(routes, phone)) {
521
+ return phone;
522
+ }
523
+
524
+ const normalizedPhone = normalizePhoneRouteKey(phone);
525
+ if (!normalizedPhone) {
526
+ return undefined;
527
+ }
528
+ return Object.keys(routes).find(
529
+ (routeKey) => normalizePhoneRouteKey(routeKey) === normalizedPhone,
530
+ );
531
+ }
532
+
533
+ export function resolveVoiceCallEffectiveConfig(
534
+ config: VoiceCallConfig,
535
+ phoneOrRouteKey: string | undefined,
536
+ ): VoiceCallEffectiveConfigResult {
537
+ const numberRouteKey = resolveVoiceCallNumberRouteKey(config, phoneOrRouteKey);
538
+ if (!numberRouteKey) {
539
+ return { config };
540
+ }
541
+
542
+ const route = config.numbers[numberRouteKey];
543
+ if (!route) {
544
+ return { config };
545
+ }
546
+
547
+ return {
548
+ numberRouteKey,
549
+ config: {
550
+ ...config,
551
+ ...route,
552
+ tts: normalizeVoiceCallTtsConfig(config.tts, route.tts),
553
+ numbers: config.numbers,
554
+ },
555
+ };
556
+ }
557
+
477
558
  function sanitizeVoiceCallProviderConfigs(
478
559
  value: Record<string, Record<string, unknown> | undefined> | undefined,
479
560
  ): Record<string, Record<string, unknown>> {
@@ -487,6 +568,19 @@ function sanitizeVoiceCallProviderConfigs(
487
568
  );
488
569
  }
489
570
 
571
+ function sanitizeVoiceCallNumberRoutes(
572
+ value: Record<string, unknown> | undefined,
573
+ ): Record<string, VoiceCallNumberRouteConfig> {
574
+ if (!value) {
575
+ return {};
576
+ }
577
+ return Object.fromEntries(
578
+ Object.entries(value)
579
+ .filter((entry): entry is [string, unknown] => entry[1] !== undefined)
580
+ .map(([key, route]) => [key, VoiceCallNumberRouteConfigSchema.parse(route)]),
581
+ );
582
+ }
583
+
490
584
  export function resolveTwilioAuthToken(
491
585
  config: Pick<VoiceCallConfig, "twilio">,
492
586
  ): string | undefined {
@@ -516,6 +610,9 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
516
610
  ...defaults,
517
611
  ...config,
518
612
  allowFrom: config.allowFrom ?? defaults.allowFrom,
613
+ numbers: sanitizeVoiceCallNumberRoutes(
614
+ (config.numbers ?? defaults.numbers) as Record<string, unknown>,
615
+ ),
519
616
  outbound: { ...defaults.outbound, ...config.outbound },
520
617
  serve,
521
618
  tailscale: { ...defaults.tailscale, ...config.tailscale },
@@ -549,6 +646,23 @@ export function normalizeVoiceCallConfig(config: VoiceCallConfigInput): VoiceCal
549
646
  };
550
647
  }
551
648
 
649
+ export function resolveVoiceCallSessionKey(params: {
650
+ config: Pick<VoiceCallConfig, "sessionScope">;
651
+ callId: string;
652
+ phone?: string;
653
+ explicitSessionKey?: string;
654
+ }): string {
655
+ const explicit = params.explicitSessionKey?.trim();
656
+ if (explicit) {
657
+ return explicit;
658
+ }
659
+ if (params.config.sessionScope === "per-call") {
660
+ return `voice:call:${params.callId}`;
661
+ }
662
+ const normalizedPhone = params.phone?.replace(/\D/g, "");
663
+ return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`;
664
+ }
665
+
552
666
  /**
553
667
  * Resolves the configuration by merging environment variables into missing fields.
554
668
  * Returns a new configuration object with environment variables applied.
@@ -426,6 +426,70 @@ describe("processEvent (functional)", () => {
426
426
  expect(call.direction).toBe("inbound");
427
427
  });
428
428
 
429
+ it("assigns per-call session keys to inbound calls when configured", () => {
430
+ const ctx = createContext({
431
+ config: VoiceCallConfigSchema.parse({
432
+ enabled: true,
433
+ provider: "plivo",
434
+ fromNumber: "+15550000000",
435
+ inboundPolicy: "open",
436
+ sessionScope: "per-call",
437
+ }),
438
+ });
439
+ const event: NormalizedEvent = {
440
+ id: "evt-inbound-session-scope",
441
+ type: "call.initiated",
442
+ callId: "CA-inbound-session-scope",
443
+ providerCallId: "CA-inbound-session-scope",
444
+ timestamp: Date.now(),
445
+ direction: "inbound",
446
+ from: "+15554444444",
447
+ to: "+15550000000",
448
+ };
449
+
450
+ processEvent(ctx, event);
451
+
452
+ const call = requireFirstActiveCall(ctx);
453
+ expect(call.sessionKey).toBe(`voice:call:${call.callId}`);
454
+ });
455
+
456
+ it("applies per-number inbound greeting and stores the matched route key", () => {
457
+ const ctx = createContext({
458
+ config: VoiceCallConfigSchema.parse({
459
+ enabled: true,
460
+ provider: "plivo",
461
+ fromNumber: "+15550000000",
462
+ inboundPolicy: "open",
463
+ inboundGreeting: "Hello from global.",
464
+ numbers: {
465
+ "+15550002222": {
466
+ inboundGreeting: "Silver Fox Cards, how can I help?",
467
+ },
468
+ },
469
+ }),
470
+ });
471
+ const event: NormalizedEvent = {
472
+ id: "evt-inbound-number-route",
473
+ type: "call.initiated",
474
+ callId: "CA-inbound-number-route",
475
+ providerCallId: "CA-inbound-number-route",
476
+ timestamp: Date.now(),
477
+ direction: "inbound",
478
+ from: "+15554444444",
479
+ to: "+1 (555) 000-2222",
480
+ };
481
+
482
+ processEvent(ctx, event);
483
+
484
+ const call = requireFirstActiveCall(ctx);
485
+ expect(call.metadata).toEqual(
486
+ expect.objectContaining({
487
+ initialMessage: "Silver Fox Cards, how can I help?",
488
+ numberRouteKey: "+15550002222",
489
+ }),
490
+ );
491
+ });
492
+
429
493
  it("deduplicates by dedupeKey even when event IDs differ", () => {
430
494
  const now = Date.now();
431
495
  const ctx = createContext();
@@ -1,6 +1,7 @@
1
1
  import crypto from "node:crypto";
2
2
  import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
3
3
  import { isAllowlistedCaller, normalizePhoneNumber } from "../allowlist.js";
4
+ import { resolveVoiceCallEffectiveConfig, resolveVoiceCallSessionKey } from "../config.js";
4
5
  import type { CallRecord, NormalizedEvent } from "../types.js";
5
6
  import type { CallManagerContext } from "./context.js";
6
7
  import { finalizeCall } from "./lifecycle.js";
@@ -64,6 +65,11 @@ function createWebhookCall(params: {
64
65
  to: string;
65
66
  }): CallRecord {
66
67
  const callId = crypto.randomUUID();
68
+ const effective = resolveVoiceCallEffectiveConfig(
69
+ params.ctx.config,
70
+ params.direction === "inbound" ? params.to : undefined,
71
+ );
72
+ const effectiveConfig = effective.config;
67
73
 
68
74
  const callRecord: CallRecord = {
69
75
  callId,
@@ -73,14 +79,20 @@ function createWebhookCall(params: {
73
79
  state: "ringing",
74
80
  from: params.from,
75
81
  to: params.to,
82
+ sessionKey: resolveVoiceCallSessionKey({
83
+ config: effectiveConfig,
84
+ callId,
85
+ phone: params.direction === "outbound" ? params.to : params.from,
86
+ }),
76
87
  startedAt: Date.now(),
77
88
  transcript: [],
78
89
  processedEventIds: [],
79
90
  metadata: {
80
91
  initialMessage:
81
92
  params.direction === "inbound"
82
- ? params.ctx.config.inboundGreeting || "Hello! How can I help you today?"
93
+ ? effectiveConfig.inboundGreeting || "Hello! How can I help you today?"
83
94
  : undefined,
95
+ ...(effective.numberRouteKey ? { numberRouteKey: effective.numberRouteKey } : {}),
84
96
  },
85
97
  };
86
98
 
@@ -170,9 +170,35 @@ describe("voice-call outbound helpers", () => {
170
170
  inlineTwiml: "<Response />",
171
171
  });
172
172
  expect(ctx.providerCallIdMap.get("provider-1")).toBe(callId);
173
+ expect(ctx.activeCalls.get(callId)?.sessionKey).toBe("session-1");
173
174
  expect(persistCallRecordMock).toHaveBeenCalledTimes(2);
174
175
  });
175
176
 
177
+ it("assigns per-call session keys to outbound calls when configured", async () => {
178
+ const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
179
+ const ctx = {
180
+ activeCalls: new Map(),
181
+ providerCallIdMap: new Map(),
182
+ provider: { name: "twilio", initiateCall: initiateProviderCall },
183
+ config: {
184
+ maxConcurrentCalls: 3,
185
+ outbound: { defaultMode: "conversation" },
186
+ fromNumber: "+14155550100",
187
+ sessionScope: "per-call",
188
+ },
189
+ storePath: "/tmp/voice-call.json",
190
+ webhookUrl: "https://example.com/webhook",
191
+ };
192
+
193
+ const result = await initiateCall(ctx as never, "+14155550123");
194
+
195
+ expect(result).toEqual({
196
+ callId: expect.any(String),
197
+ success: true,
198
+ });
199
+ expect(ctx.activeCalls.get(result.callId)?.sessionKey).toBe(`voice:call:${result.callId}`);
200
+ });
201
+
176
202
  it("initiates conversation calls with pre-connect DTMF TwiML", async () => {
177
203
  const initiateProviderCall = vi.fn(async () => ({ providerCallId: "provider-1" }));
178
204
  const ctx = {
@@ -335,6 +361,44 @@ describe("voice-call outbound helpers", () => {
335
361
  });
336
362
  });
337
363
 
364
+ it("uses per-number route TTS voice for routed inbound calls", async () => {
365
+ const call = {
366
+ callId: "call-1",
367
+ providerCallId: "provider-1",
368
+ state: "active",
369
+ to: "+15550002222",
370
+ metadata: { numberRouteKey: "+15550002222" },
371
+ };
372
+ const playTts = vi.fn(async () => {});
373
+ const ctx = {
374
+ activeCalls: new Map([["call-1", call]]),
375
+ providerCallIdMap: new Map(),
376
+ provider: { name: "twilio", playTts },
377
+ config: {
378
+ tts: { provider: "openai", providers: { openai: { voice: "coral" } } },
379
+ numbers: {
380
+ "+15550002222": {
381
+ tts: {
382
+ providers: {
383
+ openai: { voice: "alloy" },
384
+ },
385
+ },
386
+ },
387
+ },
388
+ },
389
+ storePath: "/tmp/voice-call.json",
390
+ };
391
+
392
+ await expect(speak(ctx as never, "call-1", "hello")).resolves.toEqual({ success: true });
393
+
394
+ expect(playTts).toHaveBeenCalledWith({
395
+ callId: "call-1",
396
+ providerCallId: "provider-1",
397
+ text: "hello",
398
+ voice: "alloy",
399
+ });
400
+ });
401
+
338
402
  it("sends DTMF through connected provider calls", async () => {
339
403
  const call = { callId: "call-1", providerCallId: "provider-1", state: "active" };
340
404
  const sendDtmfProvider = vi.fn(async () => {});
@@ -1,6 +1,10 @@
1
1
  import crypto from "node:crypto";
2
2
  import { formatErrorMessage } from "openclaw/plugin-sdk/error-runtime";
3
- import type { CallMode } from "../config.js";
3
+ import {
4
+ resolveVoiceCallEffectiveConfig,
5
+ resolveVoiceCallSessionKey,
6
+ type CallMode,
7
+ } from "../config.js";
4
8
  import { resolvePreferredTtsVoice } from "../tts-provider-voice.js";
5
9
  import {
6
10
  type EndReason,
@@ -162,7 +166,12 @@ export async function initiateCall(
162
166
  state: "initiated",
163
167
  from,
164
168
  to,
165
- sessionKey,
169
+ sessionKey: resolveVoiceCallSessionKey({
170
+ config: ctx.config,
171
+ callId,
172
+ phone: to,
173
+ explicitSessionKey: sessionKey,
174
+ }),
166
175
  startedAt: Date.now(),
167
176
  transcript: [],
168
177
  processedEventIds: [],
@@ -237,7 +246,11 @@ export async function speak(
237
246
  transitionState(call, "speaking");
238
247
  persistCallRecord(ctx.storePath, call);
239
248
 
240
- const voice = resolvePreferredTtsVoice(ctx.config);
249
+ const numberRouteKey =
250
+ typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
251
+ const voice = resolvePreferredTtsVoice(
252
+ resolveVoiceCallEffectiveConfig(ctx.config, numberRouteKey).config,
253
+ );
241
254
  await provider.playTts({
242
255
  callId,
243
256
  providerCallId,
@@ -6,6 +6,14 @@ import { generateVoiceResponse } from "./response-generator.js";
6
6
  function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
7
7
  const sessionStore: Record<string, { sessionId: string; updatedAt: number }> = {};
8
8
  const saveSessionStore = vi.fn(async () => {});
9
+ const updateSessionStore = vi.fn(
10
+ async (
11
+ _storePath: string,
12
+ mutator: (store: Record<string, { sessionId: string; updatedAt: number }>) => unknown,
13
+ ) => {
14
+ return await mutator(sessionStore);
15
+ },
16
+ );
9
17
  const runEmbeddedPiAgent = vi.fn(async () => ({
10
18
  payloads,
11
19
  meta: { durationMs: 12, aborted: false },
@@ -44,6 +52,7 @@ function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
44
52
  resolveStorePath,
45
53
  loadSessionStore: () => sessionStore,
46
54
  saveSessionStore,
55
+ updateSessionStore,
47
56
  resolveSessionFilePath,
48
57
  },
49
58
  } as unknown as CoreAgentDeps;
@@ -52,6 +61,7 @@ function createAgentRuntime(payloads: Array<Record<string, unknown>>) {
52
61
  runtime,
53
62
  runEmbeddedPiAgent,
54
63
  saveSessionStore,
64
+ updateSessionStore,
55
65
  sessionStore,
56
66
  resolveAgentDir,
57
67
  resolveAgentWorkspaceDir,
@@ -157,7 +167,7 @@ describe("generateVoiceResponse", () => {
157
167
  });
158
168
 
159
169
  it("pins the voice session to responseModel before running the embedded agent", async () => {
160
- const { runtime, runEmbeddedPiAgent, saveSessionStore, sessionStore } = createAgentRuntime([
170
+ const { runtime, runEmbeddedPiAgent, updateSessionStore, sessionStore } = createAgentRuntime([
161
171
  { text: '{"spoken":"Pinned model works."}' },
162
172
  ]);
163
173
  const voiceConfig = VoiceCallConfigSchema.parse({
@@ -181,7 +191,10 @@ describe("generateVoiceResponse", () => {
181
191
  modelOverride: "gpt-4.1-nano",
182
192
  modelOverrideSource: "auto",
183
193
  });
184
- expect(saveSessionStore).toHaveBeenCalledWith("/tmp/openclaw/main/sessions.json", sessionStore);
194
+ expect(updateSessionStore).toHaveBeenCalledWith(
195
+ "/tmp/openclaw/main/sessions.json",
196
+ expect.any(Function),
197
+ );
185
198
  expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
186
199
  expect.objectContaining({
187
200
  provider: "openai",
@@ -191,6 +204,37 @@ describe("generateVoiceResponse", () => {
191
204
  );
192
205
  });
193
206
 
207
+ it("uses the persisted per-call session key for classic responses", async () => {
208
+ const { runtime, runEmbeddedPiAgent, sessionStore } = createAgentRuntime([
209
+ { text: '{"spoken":"Fresh call context."}' },
210
+ ]);
211
+ const voiceConfig = VoiceCallConfigSchema.parse({
212
+ sessionScope: "per-call",
213
+ responseTimeoutMs: 5000,
214
+ });
215
+
216
+ const result = await generateVoiceResponse({
217
+ voiceConfig,
218
+ coreConfig: {} as CoreConfig,
219
+ agentRuntime: runtime,
220
+ callId: "call-123",
221
+ sessionKey: "voice:call:call-123",
222
+ from: "+15550001111",
223
+ transcript: [{ speaker: "user", text: "hello there" }],
224
+ userMessage: "hello there",
225
+ });
226
+
227
+ expect(result.text).toBe("Fresh call context.");
228
+ expect(sessionStore["voice:call:call-123"]).toBeDefined();
229
+ expect(sessionStore["voice:15550001111"]).toBeUndefined();
230
+ expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
231
+ expect.objectContaining({
232
+ sessionKey: "voice:call:call-123",
233
+ sandboxSessionKey: "agent:main:voice:call:call-123",
234
+ }),
235
+ );
236
+ });
237
+
194
238
  it("uses the main agent workspace when voice config omits agentId", async () => {
195
239
  const {
196
240
  runtime,
@@ -7,7 +7,7 @@ import crypto from "node:crypto";
7
7
  import { applyModelOverrideToSessionEntry } from "openclaw/plugin-sdk/model-session-runtime";
8
8
  import { normalizeLowercaseStringOrEmpty } from "openclaw/plugin-sdk/text-runtime";
9
9
  import type { SessionEntry } from "../api.js";
10
- import type { VoiceCallConfig } from "./config.js";
10
+ import { resolveVoiceCallSessionKey, type VoiceCallConfig } from "./config.js";
11
11
  import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
12
12
  import { resolveVoiceResponseModel } from "./response-model.js";
13
13
 
@@ -20,6 +20,8 @@ export type VoiceResponseParams = {
20
20
  agentRuntime: CoreAgentDeps;
21
21
  /** Call ID for session tracking */
22
22
  callId: string;
23
+ /** Persisted call session key */
24
+ sessionKey?: string;
23
25
  /** Caller's phone number */
24
26
  from: string;
25
27
  /** Conversation transcript */
@@ -187,16 +189,28 @@ function resolveVoiceSandboxSessionKey(agentId: string, sessionKey: string): str
187
189
  export async function generateVoiceResponse(
188
190
  params: VoiceResponseParams,
189
191
  ): Promise<VoiceResponseResult> {
190
- const { voiceConfig, callId, from, transcript, userMessage, coreConfig, agentRuntime } = params;
192
+ const {
193
+ voiceConfig,
194
+ callId,
195
+ sessionKey,
196
+ from,
197
+ transcript,
198
+ userMessage,
199
+ coreConfig,
200
+ agentRuntime,
201
+ } = params;
191
202
 
192
203
  if (!coreConfig) {
193
204
  return { text: null, error: "Core config unavailable for voice response" };
194
205
  }
195
206
  const cfg = coreConfig;
196
207
 
197
- // Build voice-specific session key based on phone number
198
- const normalizedPhone = from.replace(/\D/g, "");
199
- const sessionKey = `voice:${normalizedPhone}`;
208
+ const resolvedSessionKey = resolveVoiceCallSessionKey({
209
+ config: voiceConfig,
210
+ callId,
211
+ phone: from,
212
+ explicitSessionKey: sessionKey,
213
+ });
200
214
  const agentId = voiceConfig.agentId ?? "main";
201
215
 
202
216
  // Resolve paths
@@ -210,34 +224,34 @@ export async function generateVoiceResponse(
210
224
  // Load or create session entry
211
225
  const sessionStore = agentRuntime.session.loadSessionStore(storePath);
212
226
  const now = Date.now();
213
- let sessionEntry = sessionStore[sessionKey] as SessionEntry | undefined;
214
- let sessionEntryUpdated = false;
215
-
216
- if (!sessionEntry) {
217
- sessionEntry = {
218
- sessionId: crypto.randomUUID(),
219
- updatedAt: now,
220
- };
221
- sessionStore[sessionKey] = sessionEntry;
222
- sessionEntryUpdated = true;
223
- }
224
-
225
- const sessionId = sessionEntry.sessionId;
227
+ const existingSessionEntry = sessionStore[resolvedSessionKey] as SessionEntry | undefined;
226
228
 
227
229
  // Resolve model from config
228
230
  const { provider, model } = resolveVoiceResponseModel({ voiceConfig, agentRuntime });
229
- if (voiceConfig.responseModel) {
230
- sessionEntryUpdated =
231
- applyModelOverrideToSessionEntry({
232
- entry: sessionEntry,
233
- selection: { provider, model },
234
- selectionSource: "auto",
235
- }).updated || sessionEntryUpdated;
236
- }
237
231
 
238
- if (sessionEntryUpdated) {
239
- await agentRuntime.session.saveSessionStore(storePath, sessionStore);
232
+ let sessionEntry = existingSessionEntry;
233
+ if (!sessionEntry?.sessionId || voiceConfig.responseModel) {
234
+ sessionEntry = await agentRuntime.session.updateSessionStore(storePath, (store) => {
235
+ let entry = store[resolvedSessionKey] as SessionEntry | undefined;
236
+ if (!entry?.sessionId) {
237
+ entry = {
238
+ ...entry,
239
+ sessionId: crypto.randomUUID(),
240
+ updatedAt: now,
241
+ };
242
+ store[resolvedSessionKey] = entry;
243
+ }
244
+ if (voiceConfig.responseModel) {
245
+ applyModelOverrideToSessionEntry({
246
+ entry,
247
+ selection: { provider, model },
248
+ selectionSource: "auto",
249
+ });
250
+ }
251
+ return entry;
252
+ });
240
253
  }
254
+ const sessionId = sessionEntry.sessionId;
241
255
 
242
256
  const sessionFile = agentRuntime.session.resolveSessionFilePath(sessionId, sessionEntry, {
243
257
  agentId,
@@ -271,8 +285,8 @@ export async function generateVoiceResponse(
271
285
  try {
272
286
  const result = await agentRuntime.runEmbeddedPiAgent({
273
287
  sessionId,
274
- sessionKey,
275
- sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, sessionKey),
288
+ sessionKey: resolvedSessionKey,
289
+ sandboxSessionKey: resolveVoiceSandboxSessionKey(agentId, resolvedSessionKey),
276
290
  agentId,
277
291
  messageProvider: "voice",
278
292
  sessionFile,
@@ -28,6 +28,23 @@ const mocks = vi.hoisted(() => ({
28
28
  }));
29
29
 
30
30
  vi.mock("./config.js", () => ({
31
+ resolveVoiceCallSessionKey: (params: {
32
+ config: Pick<VoiceCallConfig, "sessionScope">;
33
+ callId: string;
34
+ phone?: string;
35
+ explicitSessionKey?: string;
36
+ }) => {
37
+ const explicit = params.explicitSessionKey?.trim();
38
+ if (explicit) {
39
+ return explicit;
40
+ }
41
+ if (params.config.sessionScope === "per-call") {
42
+ return `voice:call:${params.callId}`;
43
+ }
44
+ const normalizedPhone = params.phone?.replace(/\D/g, "");
45
+ return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${params.callId}`;
46
+ },
47
+ resolveVoiceCallEffectiveConfig: (config: VoiceCallConfig) => ({ config }),
31
48
  resolveVoiceCallConfig: mocks.resolveVoiceCallConfig,
32
49
  resolveTwilioAuthToken: mocks.resolveTwilioAuthToken,
33
50
  validateProviderConfig: mocks.validateProviderConfig,
@@ -320,6 +337,7 @@ describe("createVoiceCallRuntime lifecycle", () => {
320
337
  resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
321
338
  loadSessionStore: vi.fn(() => sessionStore),
322
339
  saveSessionStore: vi.fn(async () => {}),
340
+ updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore as never)),
323
341
  resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
324
342
  },
325
343
  runEmbeddedPiAgent,
@@ -382,6 +400,65 @@ describe("createVoiceCallRuntime lifecycle", () => {
382
400
  );
383
401
  });
384
402
 
403
+ it("uses persisted per-call session keys for realtime consults", async () => {
404
+ const config = createBaseConfig();
405
+ config.inboundPolicy = "allowlist";
406
+ config.realtime.enabled = true;
407
+ config.sessionScope = "per-call";
408
+ const runEmbeddedPiAgent = vi.fn(async () => ({
409
+ payloads: [{ text: "Per-call consult answer." }],
410
+ meta: {},
411
+ }));
412
+ const sessionStore: Record<string, unknown> = {};
413
+ const agentRuntime = {
414
+ defaults: { provider: "openai", model: "gpt-5.4" },
415
+ resolveAgentDir: vi.fn(() => "/tmp/agent"),
416
+ resolveAgentWorkspaceDir: vi.fn(() => "/tmp/workspace"),
417
+ resolveAgentIdentity: vi.fn(),
418
+ resolveThinkingDefault: vi.fn(() => "high"),
419
+ resolveAgentTimeoutMs: vi.fn(() => 30_000),
420
+ ensureAgentWorkspace: vi.fn(async () => {}),
421
+ session: {
422
+ resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
423
+ loadSessionStore: vi.fn(() => sessionStore),
424
+ saveSessionStore: vi.fn(async () => {}),
425
+ updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore as never)),
426
+ resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
427
+ },
428
+ runEmbeddedPiAgent,
429
+ };
430
+ mocks.managerGetCall.mockReturnValue({
431
+ callId: "call-1",
432
+ sessionKey: "voice:call:call-1",
433
+ direction: "inbound",
434
+ from: "+15550001234",
435
+ to: "+15550009999",
436
+ transcript: [],
437
+ });
438
+
439
+ await createVoiceCallRuntime({
440
+ config,
441
+ coreConfig: {} as CoreConfig,
442
+ agentRuntime: agentRuntime as never,
443
+ });
444
+
445
+ const handler = mocks.realtimeHandlerRegisterToolHandler.mock.calls[0]?.[1] as
446
+ | ((
447
+ args: unknown,
448
+ callId: string,
449
+ context?: { partialUserTranscript?: string },
450
+ ) => Promise<unknown>)
451
+ | undefined;
452
+ await expect(handler?.({ question: "What should I say?" }, "call-1")).resolves.toEqual({
453
+ text: "Per-call consult answer.",
454
+ });
455
+ expect(runEmbeddedPiAgent).toHaveBeenCalledWith(
456
+ expect.objectContaining({
457
+ sessionKey: "voice:call:call-1",
458
+ }),
459
+ );
460
+ });
461
+
385
462
  it("answers realtime consults from fast memory context before starting the full agent", async () => {
386
463
  const config = createBaseConfig();
387
464
  config.realtime.enabled = true;
@@ -408,6 +485,7 @@ describe("createVoiceCallRuntime lifecycle", () => {
408
485
  resolveStorePath: vi.fn(() => "/tmp/sessions.json"),
409
486
  loadSessionStore: vi.fn(() => sessionStore),
410
487
  saveSessionStore: vi.fn(async () => {}),
488
+ updateSessionStore: vi.fn(async (_storePath, mutator) => mutator(sessionStore as never)),
411
489
  resolveSessionFilePath: vi.fn(() => "/tmp/session.json"),
412
490
  },
413
491
  runEmbeddedPiAgent,
package/src/runtime.ts CHANGED
@@ -10,6 +10,8 @@ import {
10
10
  } from "openclaw/plugin-sdk/realtime-voice";
11
11
  import type { VoiceCallConfig } from "./config.js";
12
12
  import {
13
+ resolveVoiceCallEffectiveConfig,
14
+ resolveVoiceCallSessionKey,
13
15
  resolveTwilioAuthToken,
14
16
  resolveVoiceCallConfig,
15
17
  validateProviderConfig,
@@ -103,6 +105,7 @@ function loadRealtimeHandler(): Promise<RealtimeHandlerModule> {
103
105
  }
104
106
 
105
107
  function resolveVoiceCallConsultSessionKey(call: {
108
+ config: VoiceCallConfig;
106
109
  sessionKey?: string;
107
110
  from?: string;
108
111
  to?: string;
@@ -113,8 +116,11 @@ function resolveVoiceCallConsultSessionKey(call: {
113
116
  return call.sessionKey;
114
117
  }
115
118
  const phone = call.direction === "outbound" ? call.to : call.from;
116
- const normalizedPhone = phone?.replace(/\D/g, "");
117
- return normalizedPhone ? `voice:${normalizedPhone}` : `voice:${call.callId}`;
119
+ return resolveVoiceCallSessionKey({
120
+ config: call.config,
121
+ callId: call.callId,
122
+ phone,
123
+ });
118
124
  }
119
125
 
120
126
  function mapVoiceCallConsultTranscript(
@@ -334,13 +340,21 @@ export async function createVoiceCallRuntime(params: {
334
340
  if (!call) {
335
341
  return { error: `Call "${callId}" not found` };
336
342
  }
337
- const agentId = config.agentId ?? "main";
338
- const sessionKey = resolveVoiceCallConsultSessionKey(call);
343
+ const numberRouteKey =
344
+ typeof call.metadata?.numberRouteKey === "string"
345
+ ? call.metadata.numberRouteKey
346
+ : call.to;
347
+ const effectiveConfig = resolveVoiceCallEffectiveConfig(config, numberRouteKey).config;
348
+ const agentId = effectiveConfig.agentId ?? "main";
349
+ const sessionKey = resolveVoiceCallConsultSessionKey({
350
+ ...call,
351
+ config: effectiveConfig,
352
+ });
339
353
  const fastContext = await resolveRealtimeFastContextConsult({
340
354
  cfg,
341
355
  agentId,
342
356
  sessionKey,
343
- config: config.realtime.fastContext,
357
+ config: effectiveConfig.realtime.fastContext,
344
358
  args,
345
359
  logger: log,
346
360
  });
@@ -348,7 +362,7 @@ export async function createVoiceCallRuntime(params: {
348
362
  return fastContext.result;
349
363
  }
350
364
  const { provider: agentProvider, model } = resolveVoiceResponseModel({
351
- voiceConfig: config,
365
+ voiceConfig: effectiveConfig,
352
366
  agentRuntime,
353
367
  });
354
368
  const thinkLevel = agentRuntime.resolveThinkingDefault({
@@ -374,8 +388,10 @@ export async function createVoiceCallRuntime(params: {
374
388
  provider: agentProvider,
375
389
  model,
376
390
  thinkLevel,
377
- timeoutMs: config.responseTimeoutMs,
378
- toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(config.realtime.toolPolicy),
391
+ timeoutMs: effectiveConfig.responseTimeoutMs,
392
+ toolsAllow: resolveRealtimeVoiceAgentConsultToolsAllow(
393
+ effectiveConfig.realtime.toolPolicy,
394
+ ),
379
395
  extraSystemPrompt: REALTIME_VOICE_CONSULT_SYSTEM_PROMPT,
380
396
  });
381
397
  },
@@ -11,6 +11,7 @@ export function createVoiceCallBaseConfig(params?: {
11
11
  fromNumber: "+15550001234",
12
12
  inboundPolicy: "disabled",
13
13
  allowFrom: [],
14
+ numbers: {},
14
15
  outbound: { defaultMode: "notify", notifyHangupDelaySec: 3 },
15
16
  maxDurationSeconds: 300,
16
17
  staleCallReaperSeconds: 600,
@@ -18,6 +19,7 @@ export function createVoiceCallBaseConfig(params?: {
18
19
  transcriptTimeoutMs: 180000,
19
20
  ringTimeoutMs: 30000,
20
21
  maxConcurrentCalls: 1,
22
+ sessionScope: "per-phone",
21
23
  serve: { port: 3334, bind: "127.0.0.1", path: "/voice/webhook" },
22
24
  tailscale: { mode: "off", path: "/voice/webhook" },
23
25
  tunnel: {
package/src/webhook.ts CHANGED
@@ -13,7 +13,11 @@ import {
13
13
  requestBodyErrorToText,
14
14
  } from "../api.js";
15
15
  import { isAllowlistedCaller, normalizePhoneNumber } from "./allowlist.js";
16
- import { normalizeVoiceCallConfig, type VoiceCallConfig } from "./config.js";
16
+ import {
17
+ normalizeVoiceCallConfig,
18
+ resolveVoiceCallEffectiveConfig,
19
+ type VoiceCallConfig,
20
+ } from "./config.js";
17
21
  import type { CoreAgentDeps, CoreConfig } from "./core-bridge.js";
18
22
  import { getHeader } from "./http-headers.js";
19
23
  import type { CallManager } from "./manager.js";
@@ -873,12 +877,16 @@ export class VoiceCallWebhookServer {
873
877
 
874
878
  try {
875
879
  const { generateVoiceResponse } = await loadResponseGeneratorModule();
880
+ const numberRouteKey =
881
+ typeof call.metadata?.numberRouteKey === "string" ? call.metadata.numberRouteKey : call.to;
882
+ const effectiveConfig = resolveVoiceCallEffectiveConfig(this.config, numberRouteKey).config;
876
883
 
877
884
  const result = await generateVoiceResponse({
878
- voiceConfig: this.config,
885
+ voiceConfig: effectiveConfig,
879
886
  coreConfig: this.coreConfig,
880
887
  agentRuntime: this.agentRuntime,
881
888
  callId,
889
+ sessionKey: call.sessionKey,
882
890
  from: call.from,
883
891
  transcript: call.transcript,
884
892
  userMessage,