@clawdbot/voice-call 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/types.ts ADDED
@@ -0,0 +1,272 @@
1
+ import { z } from "zod";
2
+
3
+ import type { CallMode } from "./config.js";
4
+
5
+ // -----------------------------------------------------------------------------
6
+ // Provider Identifiers
7
+ // -----------------------------------------------------------------------------
8
+
9
+ export const ProviderNameSchema = z.enum(["telnyx", "twilio", "mock"]);
10
+ export type ProviderName = z.infer<typeof ProviderNameSchema>;
11
+
12
+ // -----------------------------------------------------------------------------
13
+ // Core Call Identifiers
14
+ // -----------------------------------------------------------------------------
15
+
16
+ /** Internal call identifier (UUID) */
17
+ export type CallId = string;
18
+
19
+ /** Provider-specific call identifier */
20
+ export type ProviderCallId = string;
21
+
22
+ // -----------------------------------------------------------------------------
23
+ // Call Lifecycle States
24
+ // -----------------------------------------------------------------------------
25
+
26
+ export const CallStateSchema = z.enum([
27
+ // Non-terminal states
28
+ "initiated",
29
+ "ringing",
30
+ "answered",
31
+ "active",
32
+ "speaking",
33
+ "listening",
34
+ // Terminal states
35
+ "completed",
36
+ "hangup-user",
37
+ "hangup-bot",
38
+ "timeout",
39
+ "error",
40
+ "failed",
41
+ "no-answer",
42
+ "busy",
43
+ "voicemail",
44
+ ]);
45
+ export type CallState = z.infer<typeof CallStateSchema>;
46
+
47
+ export const TerminalStates = new Set<CallState>([
48
+ "completed",
49
+ "hangup-user",
50
+ "hangup-bot",
51
+ "timeout",
52
+ "error",
53
+ "failed",
54
+ "no-answer",
55
+ "busy",
56
+ "voicemail",
57
+ ]);
58
+
59
+ export const EndReasonSchema = z.enum([
60
+ "completed",
61
+ "hangup-user",
62
+ "hangup-bot",
63
+ "timeout",
64
+ "error",
65
+ "failed",
66
+ "no-answer",
67
+ "busy",
68
+ "voicemail",
69
+ ]);
70
+ export type EndReason = z.infer<typeof EndReasonSchema>;
71
+
72
+ // -----------------------------------------------------------------------------
73
+ // Normalized Call Events
74
+ // -----------------------------------------------------------------------------
75
+
76
+ const BaseEventSchema = z.object({
77
+ id: z.string(),
78
+ callId: z.string(),
79
+ providerCallId: z.string().optional(),
80
+ timestamp: z.number(),
81
+ // Optional fields for inbound call detection
82
+ direction: z.enum(["inbound", "outbound"]).optional(),
83
+ from: z.string().optional(),
84
+ to: z.string().optional(),
85
+ });
86
+
87
+ export const NormalizedEventSchema = z.discriminatedUnion("type", [
88
+ BaseEventSchema.extend({
89
+ type: z.literal("call.initiated"),
90
+ }),
91
+ BaseEventSchema.extend({
92
+ type: z.literal("call.ringing"),
93
+ }),
94
+ BaseEventSchema.extend({
95
+ type: z.literal("call.answered"),
96
+ }),
97
+ BaseEventSchema.extend({
98
+ type: z.literal("call.active"),
99
+ }),
100
+ BaseEventSchema.extend({
101
+ type: z.literal("call.speaking"),
102
+ text: z.string(),
103
+ }),
104
+ BaseEventSchema.extend({
105
+ type: z.literal("call.speech"),
106
+ transcript: z.string(),
107
+ isFinal: z.boolean(),
108
+ confidence: z.number().min(0).max(1).optional(),
109
+ }),
110
+ BaseEventSchema.extend({
111
+ type: z.literal("call.silence"),
112
+ durationMs: z.number(),
113
+ }),
114
+ BaseEventSchema.extend({
115
+ type: z.literal("call.dtmf"),
116
+ digits: z.string(),
117
+ }),
118
+ BaseEventSchema.extend({
119
+ type: z.literal("call.ended"),
120
+ reason: EndReasonSchema,
121
+ }),
122
+ BaseEventSchema.extend({
123
+ type: z.literal("call.error"),
124
+ error: z.string(),
125
+ retryable: z.boolean().optional(),
126
+ }),
127
+ ]);
128
+ export type NormalizedEvent = z.infer<typeof NormalizedEventSchema>;
129
+
130
+ // -----------------------------------------------------------------------------
131
+ // Call Direction
132
+ // -----------------------------------------------------------------------------
133
+
134
+ export const CallDirectionSchema = z.enum(["outbound", "inbound"]);
135
+ export type CallDirection = z.infer<typeof CallDirectionSchema>;
136
+
137
+ // -----------------------------------------------------------------------------
138
+ // Call Record
139
+ // -----------------------------------------------------------------------------
140
+
141
+ export const TranscriptEntrySchema = z.object({
142
+ timestamp: z.number(),
143
+ speaker: z.enum(["bot", "user"]),
144
+ text: z.string(),
145
+ isFinal: z.boolean().default(true),
146
+ });
147
+ export type TranscriptEntry = z.infer<typeof TranscriptEntrySchema>;
148
+
149
+ export const CallRecordSchema = z.object({
150
+ callId: z.string(),
151
+ providerCallId: z.string().optional(),
152
+ provider: ProviderNameSchema,
153
+ direction: CallDirectionSchema,
154
+ state: CallStateSchema,
155
+ from: z.string(),
156
+ to: z.string(),
157
+ sessionKey: z.string().optional(),
158
+ startedAt: z.number(),
159
+ answeredAt: z.number().optional(),
160
+ endedAt: z.number().optional(),
161
+ endReason: EndReasonSchema.optional(),
162
+ transcript: z.array(TranscriptEntrySchema).default([]),
163
+ processedEventIds: z.array(z.string()).default([]),
164
+ metadata: z.record(z.string(), z.unknown()).optional(),
165
+ });
166
+ export type CallRecord = z.infer<typeof CallRecordSchema>;
167
+
168
+ // -----------------------------------------------------------------------------
169
+ // Webhook Types
170
+ // -----------------------------------------------------------------------------
171
+
172
+ export type WebhookVerificationResult = {
173
+ ok: boolean;
174
+ reason?: string;
175
+ };
176
+
177
+ export type WebhookContext = {
178
+ headers: Record<string, string | string[] | undefined>;
179
+ rawBody: string;
180
+ url: string;
181
+ method: "GET" | "POST" | "PUT" | "DELETE" | "PATCH";
182
+ query?: Record<string, string | string[] | undefined>;
183
+ };
184
+
185
+ export type ProviderWebhookParseResult = {
186
+ events: NormalizedEvent[];
187
+ providerResponseBody?: string;
188
+ providerResponseHeaders?: Record<string, string>;
189
+ statusCode?: number;
190
+ };
191
+
192
+ // -----------------------------------------------------------------------------
193
+ // Provider Method Types
194
+ // -----------------------------------------------------------------------------
195
+
196
+ export type InitiateCallInput = {
197
+ callId: CallId;
198
+ from: string;
199
+ to: string;
200
+ webhookUrl: string;
201
+ clientState?: Record<string, string>;
202
+ /** Inline TwiML to execute (skips webhook, used for notify mode) */
203
+ inlineTwiml?: string;
204
+ };
205
+
206
+ export type InitiateCallResult = {
207
+ providerCallId: ProviderCallId;
208
+ status: "initiated" | "queued";
209
+ };
210
+
211
+ export type HangupCallInput = {
212
+ callId: CallId;
213
+ providerCallId: ProviderCallId;
214
+ reason: EndReason;
215
+ };
216
+
217
+ export type PlayTtsInput = {
218
+ callId: CallId;
219
+ providerCallId: ProviderCallId;
220
+ text: string;
221
+ voice?: string;
222
+ locale?: string;
223
+ };
224
+
225
+ export type StartListeningInput = {
226
+ callId: CallId;
227
+ providerCallId: ProviderCallId;
228
+ language?: string;
229
+ };
230
+
231
+ export type StopListeningInput = {
232
+ callId: CallId;
233
+ providerCallId: ProviderCallId;
234
+ };
235
+
236
+ // -----------------------------------------------------------------------------
237
+ // Outbound Call Options
238
+ // -----------------------------------------------------------------------------
239
+
240
+ export type OutboundCallOptions = {
241
+ /** Message to speak when call connects */
242
+ message?: string;
243
+ /** Call mode (overrides config default) */
244
+ mode?: CallMode;
245
+ };
246
+
247
+ // -----------------------------------------------------------------------------
248
+ // Tool Result Types
249
+ // -----------------------------------------------------------------------------
250
+
251
+ export type InitiateCallToolResult = {
252
+ success: boolean;
253
+ callId?: string;
254
+ status?: "initiated" | "queued" | "no-answer" | "busy" | "failed";
255
+ error?: string;
256
+ };
257
+
258
+ export type ContinueCallToolResult = {
259
+ success: boolean;
260
+ transcript?: string;
261
+ error?: string;
262
+ };
263
+
264
+ export type SpeakToUserToolResult = {
265
+ success: boolean;
266
+ error?: string;
267
+ };
268
+
269
+ export type EndCallToolResult = {
270
+ success: boolean;
271
+ error?: string;
272
+ };
package/src/utils.ts ADDED
@@ -0,0 +1,12 @@
1
+ import os from "node:os";
2
+ import path from "node:path";
3
+
4
+ export function resolveUserPath(input: string): string {
5
+ const trimmed = input.trim();
6
+ if (!trimmed) return trimmed;
7
+ if (trimmed.startsWith("~")) {
8
+ const expanded = trimmed.replace(/^~(?=$|[\\/])/, os.homedir());
9
+ return path.resolve(expanded);
10
+ }
11
+ return path.resolve(trimmed);
12
+ }
@@ -0,0 +1,65 @@
1
+ /**
2
+ * Voice mapping and XML utilities for voice call providers.
3
+ */
4
+
5
+ /**
6
+ * Escape XML special characters for TwiML and other XML responses.
7
+ */
8
+ export function escapeXml(text: string): string {
9
+ return text
10
+ .replace(/&/g, "&amp;")
11
+ .replace(/</g, "&lt;")
12
+ .replace(/>/g, "&gt;")
13
+ .replace(/"/g, "&quot;")
14
+ .replace(/'/g, "&apos;");
15
+ }
16
+
17
+ /**
18
+ * Map of OpenAI voice names to similar Twilio Polly voices.
19
+ */
20
+ const OPENAI_TO_POLLY_MAP: Record<string, string> = {
21
+ alloy: "Polly.Joanna", // neutral, warm
22
+ echo: "Polly.Matthew", // male, warm
23
+ fable: "Polly.Amy", // British, expressive
24
+ onyx: "Polly.Brian", // deep male
25
+ nova: "Polly.Salli", // female, friendly
26
+ shimmer: "Polly.Kimberly", // female, clear
27
+ };
28
+
29
+ /**
30
+ * Default Polly voice when no mapping is found.
31
+ */
32
+ export const DEFAULT_POLLY_VOICE = "Polly.Joanna";
33
+
34
+ /**
35
+ * Map OpenAI voice names to Twilio Polly equivalents.
36
+ * Falls through if already a valid Polly/Google voice.
37
+ *
38
+ * @param voice - OpenAI voice name (alloy, echo, etc.) or Polly voice name
39
+ * @returns Polly voice name suitable for Twilio TwiML
40
+ */
41
+ export function mapVoiceToPolly(voice: string | undefined): string {
42
+ if (!voice) return DEFAULT_POLLY_VOICE;
43
+
44
+ // Already a Polly/Google voice - pass through
45
+ if (voice.startsWith("Polly.") || voice.startsWith("Google.")) {
46
+ return voice;
47
+ }
48
+
49
+ // Map OpenAI voices to Polly equivalents
50
+ return OPENAI_TO_POLLY_MAP[voice.toLowerCase()] || DEFAULT_POLLY_VOICE;
51
+ }
52
+
53
+ /**
54
+ * Check if a voice name is a known OpenAI voice.
55
+ */
56
+ export function isOpenAiVoice(voice: string): boolean {
57
+ return voice.toLowerCase() in OPENAI_TO_POLLY_MAP;
58
+ }
59
+
60
+ /**
61
+ * Get all supported OpenAI voice names.
62
+ */
63
+ export function getOpenAiVoiceNames(): string[] {
64
+ return Object.keys(OPENAI_TO_POLLY_MAP);
65
+ }
@@ -0,0 +1,197 @@
1
+ import crypto from "node:crypto";
2
+
3
+ import type { WebhookContext } from "./types.js";
4
+
5
+ /**
6
+ * Validate Twilio webhook signature using HMAC-SHA1.
7
+ *
8
+ * Twilio signs requests by concatenating the URL with sorted POST params,
9
+ * then computing HMAC-SHA1 with the auth token.
10
+ *
11
+ * @see https://www.twilio.com/docs/usage/webhooks/webhooks-security
12
+ */
13
+ export function validateTwilioSignature(
14
+ authToken: string,
15
+ signature: string | undefined,
16
+ url: string,
17
+ params: URLSearchParams,
18
+ ): boolean {
19
+ if (!signature) {
20
+ return false;
21
+ }
22
+
23
+ // Build the string to sign: URL + sorted params (key+value pairs)
24
+ let dataToSign = url;
25
+
26
+ // Sort params alphabetically and append key+value
27
+ const sortedParams = Array.from(params.entries()).sort((a, b) =>
28
+ a[0].localeCompare(b[0]),
29
+ );
30
+
31
+ for (const [key, value] of sortedParams) {
32
+ dataToSign += key + value;
33
+ }
34
+
35
+ // HMAC-SHA1 with auth token, then base64 encode
36
+ const expectedSignature = crypto
37
+ .createHmac("sha1", authToken)
38
+ .update(dataToSign)
39
+ .digest("base64");
40
+
41
+ // Use timing-safe comparison to prevent timing attacks
42
+ return timingSafeEqual(signature, expectedSignature);
43
+ }
44
+
45
+ /**
46
+ * Timing-safe string comparison to prevent timing attacks.
47
+ */
48
+ function timingSafeEqual(a: string, b: string): boolean {
49
+ if (a.length !== b.length) {
50
+ // Still do comparison to maintain constant time
51
+ const dummy = Buffer.from(a);
52
+ crypto.timingSafeEqual(dummy, dummy);
53
+ return false;
54
+ }
55
+
56
+ const bufA = Buffer.from(a);
57
+ const bufB = Buffer.from(b);
58
+ return crypto.timingSafeEqual(bufA, bufB);
59
+ }
60
+
61
+ /**
62
+ * Reconstruct the public webhook URL from request headers.
63
+ *
64
+ * When behind a reverse proxy (Tailscale, nginx, ngrok), the original URL
65
+ * used by Twilio differs from the local request URL. We use standard
66
+ * forwarding headers to reconstruct it.
67
+ *
68
+ * Priority order:
69
+ * 1. X-Forwarded-Proto + X-Forwarded-Host (standard proxy headers)
70
+ * 2. X-Original-Host (nginx)
71
+ * 3. Ngrok-Forwarded-Host (ngrok specific)
72
+ * 4. Host header (direct connection)
73
+ */
74
+ export function reconstructWebhookUrl(ctx: WebhookContext): string {
75
+ const { headers } = ctx;
76
+
77
+ const proto = getHeader(headers, "x-forwarded-proto") || "https";
78
+
79
+ const forwardedHost =
80
+ getHeader(headers, "x-forwarded-host") ||
81
+ getHeader(headers, "x-original-host") ||
82
+ getHeader(headers, "ngrok-forwarded-host") ||
83
+ getHeader(headers, "host") ||
84
+ "";
85
+
86
+ // Extract path from the context URL (fallback to "/" on parse failure)
87
+ let path = "/";
88
+ try {
89
+ const parsed = new URL(ctx.url);
90
+ path = parsed.pathname + parsed.search;
91
+ } catch {
92
+ // URL parsing failed
93
+ }
94
+
95
+ // Remove port from host (ngrok URLs don't have ports)
96
+ const host = forwardedHost.split(":")[0] || forwardedHost;
97
+
98
+ return `${proto}://${host}${path}`;
99
+ }
100
+
101
+ /**
102
+ * Get a header value, handling both string and string[] types.
103
+ */
104
+ function getHeader(
105
+ headers: Record<string, string | string[] | undefined>,
106
+ name: string,
107
+ ): string | undefined {
108
+ const value = headers[name.toLowerCase()];
109
+ if (Array.isArray(value)) {
110
+ return value[0];
111
+ }
112
+ return value;
113
+ }
114
+
115
+ /**
116
+ * Result of Twilio webhook verification with detailed info.
117
+ */
118
+ export interface TwilioVerificationResult {
119
+ ok: boolean;
120
+ reason?: string;
121
+ /** The URL that was used for verification (for debugging) */
122
+ verificationUrl?: string;
123
+ /** Whether we're running behind ngrok free tier */
124
+ isNgrokFreeTier?: boolean;
125
+ }
126
+
127
+ /**
128
+ * Verify Twilio webhook with full context and detailed result.
129
+ *
130
+ * Handles the special case of ngrok free tier where signature validation
131
+ * may fail due to URL discrepancies (ngrok adds interstitial page handling).
132
+ */
133
+ export function verifyTwilioWebhook(
134
+ ctx: WebhookContext,
135
+ authToken: string,
136
+ options?: {
137
+ /** Override the public URL (e.g., from config) */
138
+ publicUrl?: string;
139
+ /** Allow ngrok free tier compatibility mode (less secure) */
140
+ allowNgrokFreeTier?: boolean;
141
+ /** Skip verification entirely (only for development) */
142
+ skipVerification?: boolean;
143
+ },
144
+ ): TwilioVerificationResult {
145
+ // Allow skipping verification for development/testing
146
+ if (options?.skipVerification) {
147
+ return { ok: true, reason: "verification skipped (dev mode)" };
148
+ }
149
+
150
+ const signature = getHeader(ctx.headers, "x-twilio-signature");
151
+
152
+ if (!signature) {
153
+ return { ok: false, reason: "Missing X-Twilio-Signature header" };
154
+ }
155
+
156
+ // Reconstruct the URL Twilio used
157
+ const verificationUrl = options?.publicUrl || reconstructWebhookUrl(ctx);
158
+
159
+ // Parse the body as URL-encoded params
160
+ const params = new URLSearchParams(ctx.rawBody);
161
+
162
+ // Validate signature
163
+ const isValid = validateTwilioSignature(
164
+ authToken,
165
+ signature,
166
+ verificationUrl,
167
+ params,
168
+ );
169
+
170
+ if (isValid) {
171
+ return { ok: true, verificationUrl };
172
+ }
173
+
174
+ // Check if this is ngrok free tier - the URL might have different format
175
+ const isNgrokFreeTier =
176
+ verificationUrl.includes(".ngrok-free.app") ||
177
+ verificationUrl.includes(".ngrok.io");
178
+
179
+ if (isNgrokFreeTier && options?.allowNgrokFreeTier) {
180
+ console.warn(
181
+ "[voice-call] Twilio signature validation failed (proceeding for ngrok free tier compatibility)",
182
+ );
183
+ return {
184
+ ok: true,
185
+ reason: "ngrok free tier compatibility mode",
186
+ verificationUrl,
187
+ isNgrokFreeTier: true,
188
+ };
189
+ }
190
+
191
+ return {
192
+ ok: false,
193
+ reason: `Invalid signature for URL: ${verificationUrl}`,
194
+ verificationUrl,
195
+ isNgrokFreeTier,
196
+ };
197
+ }