@openclaw/voice-call 2026.3.13 → 2026.5.2-beta.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/README.md +27 -5
  2. package/api.ts +16 -0
  3. package/cli-metadata.ts +10 -0
  4. package/config-api.ts +12 -0
  5. package/index.test.ts +943 -0
  6. package/index.ts +379 -149
  7. package/openclaw.plugin.json +384 -157
  8. package/package.json +35 -5
  9. package/runtime-api.ts +20 -0
  10. package/runtime-entry.ts +1 -0
  11. package/setup-api.ts +47 -0
  12. package/src/allowlist.test.ts +18 -0
  13. package/src/cli.ts +533 -68
  14. package/src/config-compat.test.ts +120 -0
  15. package/src/config-compat.ts +227 -0
  16. package/src/config.test.ts +273 -12
  17. package/src/config.ts +355 -72
  18. package/src/core-bridge.ts +2 -147
  19. package/src/deep-merge.test.ts +40 -0
  20. package/src/gateway-continue-operation.ts +200 -0
  21. package/src/http-headers.ts +6 -3
  22. package/src/manager/context.ts +6 -5
  23. package/src/manager/events.test.ts +243 -19
  24. package/src/manager/events.ts +61 -31
  25. package/src/manager/lifecycle.ts +53 -0
  26. package/src/manager/lookup.test.ts +52 -0
  27. package/src/manager/outbound.test.ts +528 -0
  28. package/src/manager/outbound.ts +163 -57
  29. package/src/manager/store.ts +18 -6
  30. package/src/manager/timers.test.ts +129 -0
  31. package/src/manager/timers.ts +4 -3
  32. package/src/manager/twiml.test.ts +13 -0
  33. package/src/manager/twiml.ts +8 -0
  34. package/src/manager.closed-loop.test.ts +30 -12
  35. package/src/manager.inbound-allowlist.test.ts +77 -10
  36. package/src/manager.notify.test.ts +344 -20
  37. package/src/manager.restore.test.ts +95 -8
  38. package/src/manager.test-harness.ts +8 -6
  39. package/src/manager.ts +79 -5
  40. package/src/media-stream.test.ts +578 -81
  41. package/src/media-stream.ts +235 -54
  42. package/src/providers/base.ts +19 -0
  43. package/src/providers/mock.ts +7 -1
  44. package/src/providers/plivo.test.ts +50 -6
  45. package/src/providers/plivo.ts +14 -6
  46. package/src/providers/shared/call-status.ts +2 -1
  47. package/src/providers/shared/guarded-json-api.test.ts +106 -0
  48. package/src/providers/shared/guarded-json-api.ts +1 -1
  49. package/src/providers/telnyx.test.ts +178 -6
  50. package/src/providers/telnyx.ts +40 -3
  51. package/src/providers/twilio/api.test.ts +145 -0
  52. package/src/providers/twilio/api.ts +67 -16
  53. package/src/providers/twilio/twiml-policy.ts +6 -10
  54. package/src/providers/twilio/webhook.ts +1 -1
  55. package/src/providers/twilio.test.ts +425 -25
  56. package/src/providers/twilio.ts +230 -77
  57. package/src/providers/twilio.types.ts +17 -0
  58. package/src/realtime-defaults.ts +3 -0
  59. package/src/realtime-fast-context.test.ts +88 -0
  60. package/src/realtime-fast-context.ts +165 -0
  61. package/src/realtime-transcription.runtime.ts +4 -0
  62. package/src/realtime-voice.runtime.ts +5 -0
  63. package/src/response-generator.test.ts +321 -0
  64. package/src/response-generator.ts +213 -53
  65. package/src/response-model.test.ts +71 -0
  66. package/src/response-model.ts +23 -0
  67. package/src/runtime.test.ts +429 -0
  68. package/src/runtime.ts +270 -24
  69. package/src/telephony-audio.test.ts +61 -0
  70. package/src/telephony-audio.ts +1 -79
  71. package/src/telephony-tts.test.ts +133 -12
  72. package/src/telephony-tts.ts +155 -2
  73. package/src/test-fixtures.ts +28 -7
  74. package/src/tts-provider-voice.test.ts +34 -0
  75. package/src/tts-provider-voice.ts +21 -0
  76. package/src/tunnel.test.ts +166 -0
  77. package/src/tunnel.ts +1 -1
  78. package/src/types.ts +24 -37
  79. package/src/utils.test.ts +17 -0
  80. package/src/voice-mapping.test.ts +34 -0
  81. package/src/voice-mapping.ts +3 -2
  82. package/src/webhook/realtime-handler.test.ts +598 -0
  83. package/src/webhook/realtime-handler.ts +485 -0
  84. package/src/webhook/stale-call-reaper.test.ts +88 -0
  85. package/src/webhook/stale-call-reaper.ts +5 -0
  86. package/src/webhook/tailscale.test.ts +214 -0
  87. package/src/webhook/tailscale.ts +19 -5
  88. package/src/webhook-exposure.test.ts +33 -0
  89. package/src/webhook-exposure.ts +84 -0
  90. package/src/webhook-security.test.ts +172 -21
  91. package/src/webhook-security.ts +43 -29
  92. package/src/webhook.hangup-once.lifecycle.test.ts +135 -0
  93. package/src/webhook.test.ts +1145 -27
  94. package/src/webhook.ts +523 -102
  95. package/src/webhook.types.ts +5 -0
  96. package/src/websocket-test-support.ts +72 -0
  97. package/tsconfig.json +16 -0
  98. package/CHANGELOG.md +0 -121
  99. package/src/providers/index.ts +0 -10
  100. package/src/providers/stt-openai-realtime.test.ts +0 -42
  101. package/src/providers/stt-openai-realtime.ts +0 -311
  102. package/src/providers/tts-openai.test.ts +0 -43
  103. package/src/providers/tts-openai.ts +0 -221
@@ -1,221 +0,0 @@
1
- import { resolveOpenAITtsInstructions } from "openclaw/plugin-sdk/voice-call";
2
- import { pcmToMulaw } from "../telephony-audio.js";
3
-
4
- /**
5
- * OpenAI TTS Provider
6
- *
7
- * Generates speech audio using OpenAI's text-to-speech API.
8
- * Handles audio format conversion for telephony (mu-law 8kHz).
9
- *
10
- * Best practices from OpenAI docs:
11
- * - Use gpt-4o-mini-tts for intelligent realtime applications (supports instructions)
12
- * - Use tts-1 for lower latency, tts-1-hd for higher quality
13
- * - Use marin or cedar voices for best quality
14
- * - Use pcm or wav format for fastest response times
15
- *
16
- * @see https://platform.openai.com/docs/guides/text-to-speech
17
- */
18
-
19
- /**
20
- * OpenAI TTS configuration.
21
- */
22
- export interface OpenAITTSConfig {
23
- /** OpenAI API key (uses OPENAI_API_KEY env if not set) */
24
- apiKey?: string;
25
- /**
26
- * TTS model:
27
- * - gpt-4o-mini-tts: newest, supports instructions for tone/style control (recommended)
28
- * - tts-1: lower latency
29
- * - tts-1-hd: higher quality
30
- */
31
- model?: string;
32
- /**
33
- * Voice to use. For best quality, use marin or cedar.
34
- * All 13 voices: alloy, ash, ballad, coral, echo, fable, nova, onyx, sage, shimmer, verse, marin, cedar
35
- * Note: tts-1/tts-1-hd only support: alloy, ash, coral, echo, fable, onyx, nova, sage, shimmer
36
- */
37
- voice?: string;
38
- /** Speed multiplier (0.25 to 4.0) */
39
- speed?: number;
40
- /**
41
- * Instructions for speech style (only works with gpt-4o-mini-tts model).
42
- * Examples: "Speak in a cheerful tone", "Talk like a sympathetic customer service agent"
43
- */
44
- instructions?: string;
45
- }
46
-
47
- /**
48
- * Supported OpenAI TTS voices (all 13 built-in voices).
49
- * For best quality, use marin or cedar.
50
- * Note: tts-1 and tts-1-hd support a smaller set.
51
- */
52
- export const OPENAI_TTS_VOICES = [
53
- "alloy",
54
- "ash",
55
- "ballad",
56
- "coral",
57
- "echo",
58
- "fable",
59
- "nova",
60
- "onyx",
61
- "sage",
62
- "shimmer",
63
- "verse",
64
- "marin",
65
- "cedar",
66
- ] as const;
67
-
68
- export type OpenAITTSVoice = (typeof OPENAI_TTS_VOICES)[number];
69
-
70
- function trimToUndefined(value: string | undefined): string | undefined {
71
- const trimmed = value?.trim();
72
- return trimmed ? trimmed : undefined;
73
- }
74
-
75
- /**
76
- * OpenAI TTS Provider for generating speech audio.
77
- */
78
- export class OpenAITTSProvider {
79
- private apiKey: string;
80
- private model: string;
81
- private voice: OpenAITTSVoice;
82
- private speed: number;
83
- private instructions?: string;
84
-
85
- constructor(config: OpenAITTSConfig = {}) {
86
- this.apiKey =
87
- trimToUndefined(config.apiKey) ?? trimToUndefined(process.env.OPENAI_API_KEY) ?? "";
88
- // Default to gpt-4o-mini-tts for intelligent realtime applications
89
- this.model = trimToUndefined(config.model) ?? "gpt-4o-mini-tts";
90
- // Default to coral - good balance of quality and natural tone
91
- this.voice = (trimToUndefined(config.voice) as OpenAITTSVoice | undefined) ?? "coral";
92
- this.speed = config.speed ?? 1.0;
93
- this.instructions = trimToUndefined(config.instructions);
94
-
95
- if (!this.apiKey) {
96
- throw new Error("OpenAI API key required (set OPENAI_API_KEY or pass apiKey)");
97
- }
98
- }
99
-
100
- /**
101
- * Generate speech audio from text.
102
- * Returns raw PCM audio data (24kHz, mono, 16-bit).
103
- */
104
- async synthesize(text: string, instructions?: string): Promise<Buffer> {
105
- // Build request body
106
- const body: Record<string, unknown> = {
107
- model: this.model,
108
- input: text,
109
- voice: this.voice,
110
- response_format: "pcm", // Raw PCM audio (24kHz, mono, 16-bit signed LE)
111
- speed: this.speed,
112
- };
113
-
114
- const effectiveInstructions = resolveOpenAITtsInstructions(
115
- this.model,
116
- trimToUndefined(instructions) ?? this.instructions,
117
- );
118
- if (effectiveInstructions) {
119
- body.instructions = effectiveInstructions;
120
- }
121
-
122
- const response = await fetch("https://api.openai.com/v1/audio/speech", {
123
- method: "POST",
124
- headers: {
125
- Authorization: `Bearer ${this.apiKey}`,
126
- "Content-Type": "application/json",
127
- },
128
- body: JSON.stringify(body),
129
- });
130
-
131
- if (!response.ok) {
132
- const error = await response.text();
133
- throw new Error(`OpenAI TTS failed: ${response.status} - ${error}`);
134
- }
135
-
136
- const arrayBuffer = await response.arrayBuffer();
137
- return Buffer.from(arrayBuffer);
138
- }
139
-
140
- /**
141
- * Generate speech and convert to mu-law format for Twilio.
142
- * Twilio Media Streams expect 8kHz mono mu-law audio.
143
- */
144
- async synthesizeForTwilio(text: string): Promise<Buffer> {
145
- // Get raw PCM from OpenAI (24kHz, 16-bit signed LE, mono)
146
- const pcm24k = await this.synthesize(text);
147
-
148
- // Resample from 24kHz to 8kHz
149
- const pcm8k = resample24kTo8k(pcm24k);
150
-
151
- // Encode to mu-law
152
- return pcmToMulaw(pcm8k);
153
- }
154
- }
155
-
156
- /**
157
- * Resample 24kHz PCM to 8kHz using linear interpolation.
158
- * Input/output: 16-bit signed little-endian mono.
159
- */
160
- function resample24kTo8k(input: Buffer): Buffer {
161
- const inputSamples = input.length / 2;
162
- const outputSamples = Math.floor(inputSamples / 3);
163
- const output = Buffer.alloc(outputSamples * 2);
164
-
165
- for (let i = 0; i < outputSamples; i++) {
166
- // Calculate position in input (3:1 ratio)
167
- const srcPos = i * 3;
168
- const srcIdx = srcPos * 2;
169
-
170
- if (srcIdx + 3 < input.length) {
171
- // Linear interpolation between samples
172
- const s0 = input.readInt16LE(srcIdx);
173
- const s1 = input.readInt16LE(srcIdx + 2);
174
- const frac = srcPos % 1 || 0;
175
- const sample = Math.round(s0 + frac * (s1 - s0));
176
- output.writeInt16LE(clamp16(sample), i * 2);
177
- } else {
178
- // Last sample
179
- output.writeInt16LE(input.readInt16LE(srcIdx), i * 2);
180
- }
181
- }
182
-
183
- return output;
184
- }
185
-
186
- /**
187
- * Clamp value to 16-bit signed integer range.
188
- */
189
- function clamp16(value: number): number {
190
- return Math.max(-32768, Math.min(32767, value));
191
- }
192
-
193
- /**
194
- * Convert 8-bit mu-law to 16-bit linear PCM.
195
- * Useful for decoding incoming audio.
196
- */
197
- export function mulawToLinear(mulaw: number): number {
198
- // mu-law is transmitted inverted
199
- mulaw = ~mulaw & 0xff;
200
-
201
- const sign = mulaw & 0x80;
202
- const exponent = (mulaw >> 4) & 0x07;
203
- const mantissa = mulaw & 0x0f;
204
-
205
- let sample = ((mantissa << 3) + 132) << exponent;
206
- sample -= 132;
207
-
208
- return sign ? -sample : sample;
209
- }
210
-
211
- /**
212
- * Chunk audio buffer into 20ms frames for streaming.
213
- * At 8kHz mono, 20ms = 160 samples = 160 bytes (mu-law).
214
- */
215
- export function chunkAudio(audio: Buffer, chunkSize = 160): Generator<Buffer, void, unknown> {
216
- return (function* () {
217
- for (let i = 0; i < audio.length; i += chunkSize) {
218
- yield audio.subarray(i, Math.min(i + chunkSize, audio.length));
219
- }
220
- })();
221
- }