@clawdbot/voice-call 0.1.0 → 2026.1.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +15 -0
- package/README.md +9 -3
- package/index.ts +1 -1
- package/package.json +4 -2
- package/src/cli.ts +4 -1
- package/src/config.ts +26 -2
- package/src/manager/context.ts +22 -0
- package/src/manager/events.ts +178 -0
- package/src/manager/lookup.ts +34 -0
- package/src/manager/outbound.ts +247 -0
- package/src/manager/state.ts +51 -0
- package/src/manager/store.ts +89 -0
- package/src/manager/timers.ts +87 -0
- package/src/manager/twiml.ts +10 -0
- package/src/manager.test.ts +108 -0
- package/src/manager.ts +29 -1
- package/src/providers/index.ts +1 -0
- package/src/providers/plivo.test.ts +29 -0
- package/src/providers/plivo.ts +504 -0
- package/src/providers/twilio/api.ts +29 -0
- package/src/providers/twilio/webhook.ts +30 -0
- package/src/providers/twilio.ts +14 -54
- package/src/runtime.ts +13 -0
- package/src/types.ts +1 -1
- package/src/webhook-security.test.ts +156 -0
- package/src/webhook-security.ts +242 -0
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,20 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## 2026.1.16
|
|
4
|
+
|
|
5
|
+
### Changes
|
|
6
|
+
- Version alignment with core Clawdbot release numbers.
|
|
7
|
+
|
|
8
|
+
## 2026.1.15
|
|
9
|
+
|
|
10
|
+
### Changes
|
|
11
|
+
- Version alignment with core Clawdbot release numbers.
|
|
12
|
+
|
|
13
|
+
## 2026.1.14
|
|
14
|
+
|
|
15
|
+
### Changes
|
|
16
|
+
- Version alignment with core Clawdbot release numbers.
|
|
17
|
+
|
|
3
18
|
## 0.1.0
|
|
4
19
|
|
|
5
20
|
### Highlights
|
package/README.md
CHANGED
|
@@ -5,6 +5,7 @@ Official Voice Call plugin for **Clawdbot**.
|
|
|
5
5
|
Providers:
|
|
6
6
|
- **Twilio** (Programmable Voice + Media Streams)
|
|
7
7
|
- **Telnyx** (Call Control v2)
|
|
8
|
+
- **Plivo** (Voice API + XML transfer + GetInput speech)
|
|
8
9
|
- **Mock** (dev/no network)
|
|
9
10
|
|
|
10
11
|
Docs: `https://docs.clawd.bot/plugins/voice-call`
|
|
@@ -34,7 +35,7 @@ Put under `plugins.entries.voice-call.config`:
|
|
|
34
35
|
|
|
35
36
|
```json5
|
|
36
37
|
{
|
|
37
|
-
provider: "twilio", // or "telnyx" | "mock"
|
|
38
|
+
provider: "twilio", // or "telnyx" | "plivo" | "mock"
|
|
38
39
|
fromNumber: "+15550001234",
|
|
39
40
|
toNumber: "+15550005678",
|
|
40
41
|
|
|
@@ -43,6 +44,11 @@ Put under `plugins.entries.voice-call.config`:
|
|
|
43
44
|
authToken: "your_token"
|
|
44
45
|
},
|
|
45
46
|
|
|
47
|
+
plivo: {
|
|
48
|
+
authId: "MAxxxxxxxxxxxxxxxxxxxx",
|
|
49
|
+
authToken: "your_token"
|
|
50
|
+
},
|
|
51
|
+
|
|
46
52
|
// Webhook server
|
|
47
53
|
serve: {
|
|
48
54
|
port: 3334,
|
|
@@ -66,7 +72,7 @@ Put under `plugins.entries.voice-call.config`:
|
|
|
66
72
|
```
|
|
67
73
|
|
|
68
74
|
Notes:
|
|
69
|
-
- Twilio/Telnyx require a **publicly reachable** webhook URL.
|
|
75
|
+
- Twilio/Telnyx/Plivo require a **publicly reachable** webhook URL.
|
|
70
76
|
- `mock` is a local dev provider (no network calls).
|
|
71
77
|
|
|
72
78
|
## CLI
|
|
@@ -102,6 +108,6 @@ Actions:
|
|
|
102
108
|
|
|
103
109
|
## Notes
|
|
104
110
|
|
|
105
|
-
- Uses webhook signature verification for Twilio/Telnyx.
|
|
111
|
+
- Uses webhook signature verification for Twilio/Telnyx/Plivo.
|
|
106
112
|
- `responseModel` / `responseSystemPrompt` control AI auto-responses.
|
|
107
113
|
- Media streaming requires `ws` and OpenAI Realtime API key.
|
package/index.ts
CHANGED
|
@@ -125,7 +125,7 @@ const VoiceCallToolSchema = Type.Union([
|
|
|
125
125
|
const voiceCallPlugin = {
|
|
126
126
|
id: "voice-call",
|
|
127
127
|
name: "Voice Call",
|
|
128
|
-
description: "Voice-call plugin with Telnyx/Twilio providers",
|
|
128
|
+
description: "Voice-call plugin with Telnyx/Twilio/Plivo providers",
|
|
129
129
|
configSchema: voiceCallConfigSchema,
|
|
130
130
|
register(api) {
|
|
131
131
|
const cfg = voiceCallConfigSchema.parse(api.pluginConfig);
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@clawdbot/voice-call",
|
|
3
|
-
"version": "
|
|
3
|
+
"version": "2026.1.16",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Clawdbot voice-call plugin",
|
|
6
6
|
"dependencies": {
|
|
@@ -9,6 +9,8 @@
|
|
|
9
9
|
"zod": "^4.3.5"
|
|
10
10
|
},
|
|
11
11
|
"clawdbot": {
|
|
12
|
-
"extensions": [
|
|
12
|
+
"extensions": [
|
|
13
|
+
"./index.ts"
|
|
14
|
+
]
|
|
13
15
|
}
|
|
14
16
|
}
|
package/src/cli.ts
CHANGED
|
@@ -42,7 +42,10 @@ export function registerVoiceCallCli(params: {
|
|
|
42
42
|
logger: Logger;
|
|
43
43
|
}) {
|
|
44
44
|
const { program, config, ensureRuntime, logger } = params;
|
|
45
|
-
const root = program
|
|
45
|
+
const root = program
|
|
46
|
+
.command("voicecall")
|
|
47
|
+
.description("Voice call utilities")
|
|
48
|
+
.addHelpText("after", () => `\nDocs: https://docs.clawd.bot/cli/voicecall\n`);
|
|
46
49
|
|
|
47
50
|
root
|
|
48
51
|
.command("call")
|
package/src/config.ts
CHANGED
|
@@ -53,6 +53,14 @@ export const TwilioConfigSchema = z.object({
|
|
|
53
53
|
});
|
|
54
54
|
export type TwilioConfig = z.infer<typeof TwilioConfigSchema>;
|
|
55
55
|
|
|
56
|
+
export const PlivoConfigSchema = z.object({
|
|
57
|
+
/** Plivo Auth ID (starts with MA/SA) */
|
|
58
|
+
authId: z.string().min(1).optional(),
|
|
59
|
+
/** Plivo Auth Token */
|
|
60
|
+
authToken: z.string().min(1).optional(),
|
|
61
|
+
});
|
|
62
|
+
export type PlivoConfig = z.infer<typeof PlivoConfigSchema>;
|
|
63
|
+
|
|
56
64
|
// -----------------------------------------------------------------------------
|
|
57
65
|
// STT/TTS Configuration
|
|
58
66
|
// -----------------------------------------------------------------------------
|
|
@@ -219,8 +227,8 @@ export const VoiceCallConfigSchema = z.object({
|
|
|
219
227
|
/** Enable voice call functionality */
|
|
220
228
|
enabled: z.boolean().default(false),
|
|
221
229
|
|
|
222
|
-
/** Active provider (telnyx, twilio, or mock) */
|
|
223
|
-
provider: z.enum(["telnyx", "twilio", "mock"]).optional(),
|
|
230
|
+
/** Active provider (telnyx, twilio, plivo, or mock) */
|
|
231
|
+
provider: z.enum(["telnyx", "twilio", "plivo", "mock"]).optional(),
|
|
224
232
|
|
|
225
233
|
/** Telnyx-specific configuration */
|
|
226
234
|
telnyx: TelnyxConfigSchema.optional(),
|
|
@@ -228,6 +236,9 @@ export const VoiceCallConfigSchema = z.object({
|
|
|
228
236
|
/** Twilio-specific configuration */
|
|
229
237
|
twilio: TwilioConfigSchema.optional(),
|
|
230
238
|
|
|
239
|
+
/** Plivo-specific configuration */
|
|
240
|
+
plivo: PlivoConfigSchema.optional(),
|
|
241
|
+
|
|
231
242
|
/** Phone number to call from (E.164) */
|
|
232
243
|
fromNumber: E164Schema.optional(),
|
|
233
244
|
|
|
@@ -351,5 +362,18 @@ export function validateProviderConfig(config: VoiceCallConfig): {
|
|
|
351
362
|
}
|
|
352
363
|
}
|
|
353
364
|
|
|
365
|
+
if (config.provider === "plivo") {
|
|
366
|
+
if (!config.plivo?.authId) {
|
|
367
|
+
errors.push(
|
|
368
|
+
"plugins.entries.voice-call.config.plivo.authId is required (or set PLIVO_AUTH_ID env)",
|
|
369
|
+
);
|
|
370
|
+
}
|
|
371
|
+
if (!config.plivo?.authToken) {
|
|
372
|
+
errors.push(
|
|
373
|
+
"plugins.entries.voice-call.config.plivo.authToken is required (or set PLIVO_AUTH_TOKEN env)",
|
|
374
|
+
);
|
|
375
|
+
}
|
|
376
|
+
}
|
|
377
|
+
|
|
354
378
|
return { valid: errors.length === 0, errors };
|
|
355
379
|
}
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { CallId, CallRecord } from "../types.js";
|
|
2
|
+
import type { VoiceCallConfig } from "../config.js";
|
|
3
|
+
import type { VoiceCallProvider } from "../providers/base.js";
|
|
4
|
+
|
|
5
|
+
export type TranscriptWaiter = {
|
|
6
|
+
resolve: (text: string) => void;
|
|
7
|
+
reject: (err: Error) => void;
|
|
8
|
+
timeout: NodeJS.Timeout;
|
|
9
|
+
};
|
|
10
|
+
|
|
11
|
+
export type CallManagerContext = {
|
|
12
|
+
activeCalls: Map<CallId, CallRecord>;
|
|
13
|
+
providerCallIdMap: Map<string, CallId>;
|
|
14
|
+
processedEventIds: Set<string>;
|
|
15
|
+
provider: VoiceCallProvider | null;
|
|
16
|
+
config: VoiceCallConfig;
|
|
17
|
+
storePath: string;
|
|
18
|
+
webhookUrl: string | null;
|
|
19
|
+
transcriptWaiters: Map<CallId, TranscriptWaiter>;
|
|
20
|
+
maxDurationTimers: Map<CallId, NodeJS.Timeout>;
|
|
21
|
+
};
|
|
22
|
+
|
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
|
|
3
|
+
import type { CallId, CallRecord, CallState, NormalizedEvent } from "../types.js";
|
|
4
|
+
import { TerminalStates } from "../types.js";
|
|
5
|
+
import type { CallManagerContext } from "./context.js";
|
|
6
|
+
import { findCall } from "./lookup.js";
|
|
7
|
+
import { addTranscriptEntry, transitionState } from "./state.js";
|
|
8
|
+
import { persistCallRecord } from "./store.js";
|
|
9
|
+
import {
|
|
10
|
+
clearMaxDurationTimer,
|
|
11
|
+
rejectTranscriptWaiter,
|
|
12
|
+
resolveTranscriptWaiter,
|
|
13
|
+
startMaxDurationTimer,
|
|
14
|
+
} from "./timers.js";
|
|
15
|
+
import { endCall } from "./outbound.js";
|
|
16
|
+
|
|
17
|
+
function shouldAcceptInbound(config: CallManagerContext["config"], from: string | undefined): boolean {
|
|
18
|
+
const { inboundPolicy: policy, allowFrom } = config;
|
|
19
|
+
|
|
20
|
+
switch (policy) {
|
|
21
|
+
case "disabled":
|
|
22
|
+
console.log("[voice-call] Inbound call rejected: policy is disabled");
|
|
23
|
+
return false;
|
|
24
|
+
|
|
25
|
+
case "open":
|
|
26
|
+
console.log("[voice-call] Inbound call accepted: policy is open");
|
|
27
|
+
return true;
|
|
28
|
+
|
|
29
|
+
case "allowlist":
|
|
30
|
+
case "pairing": {
|
|
31
|
+
const normalized = from?.replace(/\D/g, "") || "";
|
|
32
|
+
const allowed = (allowFrom || []).some((num) => {
|
|
33
|
+
const normalizedAllow = num.replace(/\D/g, "");
|
|
34
|
+
return normalized.endsWith(normalizedAllow) || normalizedAllow.endsWith(normalized);
|
|
35
|
+
});
|
|
36
|
+
const status = allowed ? "accepted" : "rejected";
|
|
37
|
+
console.log(
|
|
38
|
+
`[voice-call] Inbound call ${status}: ${from} ${allowed ? "is in" : "not in"} allowlist`,
|
|
39
|
+
);
|
|
40
|
+
return allowed;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
default:
|
|
44
|
+
return false;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
function createInboundCall(params: {
|
|
49
|
+
ctx: CallManagerContext;
|
|
50
|
+
providerCallId: string;
|
|
51
|
+
from: string;
|
|
52
|
+
to: string;
|
|
53
|
+
}): CallRecord {
|
|
54
|
+
const callId = crypto.randomUUID();
|
|
55
|
+
|
|
56
|
+
const callRecord: CallRecord = {
|
|
57
|
+
callId,
|
|
58
|
+
providerCallId: params.providerCallId,
|
|
59
|
+
provider: params.ctx.provider?.name || "twilio",
|
|
60
|
+
direction: "inbound",
|
|
61
|
+
state: "ringing",
|
|
62
|
+
from: params.from,
|
|
63
|
+
to: params.to,
|
|
64
|
+
startedAt: Date.now(),
|
|
65
|
+
transcript: [],
|
|
66
|
+
processedEventIds: [],
|
|
67
|
+
metadata: {
|
|
68
|
+
initialMessage: params.ctx.config.inboundGreeting || "Hello! How can I help you today?",
|
|
69
|
+
},
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
params.ctx.activeCalls.set(callId, callRecord);
|
|
73
|
+
params.ctx.providerCallIdMap.set(params.providerCallId, callId);
|
|
74
|
+
persistCallRecord(params.ctx.storePath, callRecord);
|
|
75
|
+
|
|
76
|
+
console.log(`[voice-call] Created inbound call record: ${callId} from ${params.from}`);
|
|
77
|
+
return callRecord;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
export function processEvent(ctx: CallManagerContext, event: NormalizedEvent): void {
|
|
81
|
+
if (ctx.processedEventIds.has(event.id)) return;
|
|
82
|
+
ctx.processedEventIds.add(event.id);
|
|
83
|
+
|
|
84
|
+
let call = findCall({
|
|
85
|
+
activeCalls: ctx.activeCalls,
|
|
86
|
+
providerCallIdMap: ctx.providerCallIdMap,
|
|
87
|
+
callIdOrProviderCallId: event.callId,
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
if (!call && event.direction === "inbound" && event.providerCallId) {
|
|
91
|
+
if (!shouldAcceptInbound(ctx.config, event.from)) {
|
|
92
|
+
// TODO: Could hang up the call here.
|
|
93
|
+
return;
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
call = createInboundCall({
|
|
97
|
+
ctx,
|
|
98
|
+
providerCallId: event.providerCallId,
|
|
99
|
+
from: event.from || "unknown",
|
|
100
|
+
to: event.to || ctx.config.fromNumber || "unknown",
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Normalize event to internal ID for downstream consumers.
|
|
104
|
+
event.callId = call.callId;
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
if (!call) return;
|
|
108
|
+
|
|
109
|
+
if (event.providerCallId && !call.providerCallId) {
|
|
110
|
+
call.providerCallId = event.providerCallId;
|
|
111
|
+
ctx.providerCallIdMap.set(event.providerCallId, call.callId);
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
call.processedEventIds.push(event.id);
|
|
115
|
+
|
|
116
|
+
switch (event.type) {
|
|
117
|
+
case "call.initiated":
|
|
118
|
+
transitionState(call, "initiated");
|
|
119
|
+
break;
|
|
120
|
+
|
|
121
|
+
case "call.ringing":
|
|
122
|
+
transitionState(call, "ringing");
|
|
123
|
+
break;
|
|
124
|
+
|
|
125
|
+
case "call.answered":
|
|
126
|
+
call.answeredAt = event.timestamp;
|
|
127
|
+
transitionState(call, "answered");
|
|
128
|
+
startMaxDurationTimer({
|
|
129
|
+
ctx,
|
|
130
|
+
callId: call.callId,
|
|
131
|
+
onTimeout: async (callId) => {
|
|
132
|
+
await endCall(ctx, callId);
|
|
133
|
+
},
|
|
134
|
+
});
|
|
135
|
+
break;
|
|
136
|
+
|
|
137
|
+
case "call.active":
|
|
138
|
+
transitionState(call, "active");
|
|
139
|
+
break;
|
|
140
|
+
|
|
141
|
+
case "call.speaking":
|
|
142
|
+
transitionState(call, "speaking");
|
|
143
|
+
break;
|
|
144
|
+
|
|
145
|
+
case "call.speech":
|
|
146
|
+
if (event.isFinal) {
|
|
147
|
+
addTranscriptEntry(call, "user", event.transcript);
|
|
148
|
+
resolveTranscriptWaiter(ctx, call.callId, event.transcript);
|
|
149
|
+
}
|
|
150
|
+
transitionState(call, "listening");
|
|
151
|
+
break;
|
|
152
|
+
|
|
153
|
+
case "call.ended":
|
|
154
|
+
call.endedAt = event.timestamp;
|
|
155
|
+
call.endReason = event.reason;
|
|
156
|
+
transitionState(call, event.reason as CallState);
|
|
157
|
+
clearMaxDurationTimer(ctx, call.callId);
|
|
158
|
+
rejectTranscriptWaiter(ctx, call.callId, `Call ended: ${event.reason}`);
|
|
159
|
+
ctx.activeCalls.delete(call.callId);
|
|
160
|
+
if (call.providerCallId) ctx.providerCallIdMap.delete(call.providerCallId);
|
|
161
|
+
break;
|
|
162
|
+
|
|
163
|
+
case "call.error":
|
|
164
|
+
if (!event.retryable) {
|
|
165
|
+
call.endedAt = event.timestamp;
|
|
166
|
+
call.endReason = "error";
|
|
167
|
+
transitionState(call, "error");
|
|
168
|
+
clearMaxDurationTimer(ctx, call.callId);
|
|
169
|
+
rejectTranscriptWaiter(ctx, call.callId, `Call error: ${event.error}`);
|
|
170
|
+
ctx.activeCalls.delete(call.callId);
|
|
171
|
+
if (call.providerCallId) ctx.providerCallIdMap.delete(call.providerCallId);
|
|
172
|
+
}
|
|
173
|
+
break;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
persistCallRecord(ctx.storePath, call);
|
|
177
|
+
}
|
|
178
|
+
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import type { CallId, CallRecord } from "../types.js";
|
|
2
|
+
|
|
3
|
+
export function getCallByProviderCallId(params: {
|
|
4
|
+
activeCalls: Map<CallId, CallRecord>;
|
|
5
|
+
providerCallIdMap: Map<string, CallId>;
|
|
6
|
+
providerCallId: string;
|
|
7
|
+
}): CallRecord | undefined {
|
|
8
|
+
const callId = params.providerCallIdMap.get(params.providerCallId);
|
|
9
|
+
if (callId) {
|
|
10
|
+
return params.activeCalls.get(callId);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
for (const call of params.activeCalls.values()) {
|
|
14
|
+
if (call.providerCallId === params.providerCallId) {
|
|
15
|
+
return call;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return undefined;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function findCall(params: {
|
|
22
|
+
activeCalls: Map<CallId, CallRecord>;
|
|
23
|
+
providerCallIdMap: Map<string, CallId>;
|
|
24
|
+
callIdOrProviderCallId: string;
|
|
25
|
+
}): CallRecord | undefined {
|
|
26
|
+
const directCall = params.activeCalls.get(params.callIdOrProviderCallId);
|
|
27
|
+
if (directCall) return directCall;
|
|
28
|
+
return getCallByProviderCallId({
|
|
29
|
+
activeCalls: params.activeCalls,
|
|
30
|
+
providerCallIdMap: params.providerCallIdMap,
|
|
31
|
+
providerCallId: params.callIdOrProviderCallId,
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
|
|
@@ -0,0 +1,247 @@
|
|
|
1
|
+
import crypto from "node:crypto";
|
|
2
|
+
|
|
3
|
+
import { TerminalStates, type CallId, type CallRecord, type OutboundCallOptions } from "../types.js";
|
|
4
|
+
import type { CallMode } from "../config.js";
|
|
5
|
+
import { mapVoiceToPolly } from "../voice-mapping.js";
|
|
6
|
+
import type { CallManagerContext } from "./context.js";
|
|
7
|
+
import { getCallByProviderCallId } from "./lookup.js";
|
|
8
|
+
import { generateNotifyTwiml } from "./twiml.js";
|
|
9
|
+
import { addTranscriptEntry, transitionState } from "./state.js";
|
|
10
|
+
import { persistCallRecord } from "./store.js";
|
|
11
|
+
import { clearMaxDurationTimer, clearTranscriptWaiter, rejectTranscriptWaiter, waitForFinalTranscript } from "./timers.js";
|
|
12
|
+
|
|
13
|
+
export async function initiateCall(
|
|
14
|
+
ctx: CallManagerContext,
|
|
15
|
+
to: string,
|
|
16
|
+
sessionKey?: string,
|
|
17
|
+
options?: OutboundCallOptions | string,
|
|
18
|
+
): Promise<{ callId: CallId; success: boolean; error?: string }> {
|
|
19
|
+
const opts: OutboundCallOptions =
|
|
20
|
+
typeof options === "string" ? { message: options } : (options ?? {});
|
|
21
|
+
const initialMessage = opts.message;
|
|
22
|
+
const mode = opts.mode ?? ctx.config.outbound.defaultMode;
|
|
23
|
+
|
|
24
|
+
if (!ctx.provider) {
|
|
25
|
+
return { callId: "", success: false, error: "Provider not initialized" };
|
|
26
|
+
}
|
|
27
|
+
if (!ctx.webhookUrl) {
|
|
28
|
+
return { callId: "", success: false, error: "Webhook URL not configured" };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
if (ctx.activeCalls.size >= ctx.config.maxConcurrentCalls) {
|
|
32
|
+
return {
|
|
33
|
+
callId: "",
|
|
34
|
+
success: false,
|
|
35
|
+
error: `Maximum concurrent calls (${ctx.config.maxConcurrentCalls}) reached`,
|
|
36
|
+
};
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
const callId = crypto.randomUUID();
|
|
40
|
+
const from =
|
|
41
|
+
ctx.config.fromNumber ||
|
|
42
|
+
(ctx.provider?.name === "mock" ? "+15550000000" : undefined);
|
|
43
|
+
if (!from) {
|
|
44
|
+
return { callId: "", success: false, error: "fromNumber not configured" };
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
const callRecord: CallRecord = {
|
|
48
|
+
callId,
|
|
49
|
+
provider: ctx.provider.name,
|
|
50
|
+
direction: "outbound",
|
|
51
|
+
state: "initiated",
|
|
52
|
+
from,
|
|
53
|
+
to,
|
|
54
|
+
sessionKey,
|
|
55
|
+
startedAt: Date.now(),
|
|
56
|
+
transcript: [],
|
|
57
|
+
processedEventIds: [],
|
|
58
|
+
metadata: {
|
|
59
|
+
...(initialMessage && { initialMessage }),
|
|
60
|
+
mode,
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
|
|
64
|
+
ctx.activeCalls.set(callId, callRecord);
|
|
65
|
+
persistCallRecord(ctx.storePath, callRecord);
|
|
66
|
+
|
|
67
|
+
try {
|
|
68
|
+
// For notify mode with a message, use inline TwiML with <Say>.
|
|
69
|
+
let inlineTwiml: string | undefined;
|
|
70
|
+
if (mode === "notify" && initialMessage) {
|
|
71
|
+
const pollyVoice = mapVoiceToPolly(ctx.config.tts.voice);
|
|
72
|
+
inlineTwiml = generateNotifyTwiml(initialMessage, pollyVoice);
|
|
73
|
+
console.log(`[voice-call] Using inline TwiML for notify mode (voice: ${pollyVoice})`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
const result = await ctx.provider.initiateCall({
|
|
77
|
+
callId,
|
|
78
|
+
from,
|
|
79
|
+
to,
|
|
80
|
+
webhookUrl: ctx.webhookUrl,
|
|
81
|
+
inlineTwiml,
|
|
82
|
+
});
|
|
83
|
+
|
|
84
|
+
callRecord.providerCallId = result.providerCallId;
|
|
85
|
+
ctx.providerCallIdMap.set(result.providerCallId, callId);
|
|
86
|
+
persistCallRecord(ctx.storePath, callRecord);
|
|
87
|
+
|
|
88
|
+
return { callId, success: true };
|
|
89
|
+
} catch (err) {
|
|
90
|
+
callRecord.state = "failed";
|
|
91
|
+
callRecord.endedAt = Date.now();
|
|
92
|
+
callRecord.endReason = "failed";
|
|
93
|
+
persistCallRecord(ctx.storePath, callRecord);
|
|
94
|
+
ctx.activeCalls.delete(callId);
|
|
95
|
+
if (callRecord.providerCallId) {
|
|
96
|
+
ctx.providerCallIdMap.delete(callRecord.providerCallId);
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
callId,
|
|
101
|
+
success: false,
|
|
102
|
+
error: err instanceof Error ? err.message : String(err),
|
|
103
|
+
};
|
|
104
|
+
}
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
export async function speak(
|
|
108
|
+
ctx: CallManagerContext,
|
|
109
|
+
callId: CallId,
|
|
110
|
+
text: string,
|
|
111
|
+
): Promise<{ success: boolean; error?: string }> {
|
|
112
|
+
const call = ctx.activeCalls.get(callId);
|
|
113
|
+
if (!call) return { success: false, error: "Call not found" };
|
|
114
|
+
if (!ctx.provider || !call.providerCallId) return { success: false, error: "Call not connected" };
|
|
115
|
+
if (TerminalStates.has(call.state)) return { success: false, error: "Call has ended" };
|
|
116
|
+
|
|
117
|
+
try {
|
|
118
|
+
transitionState(call, "speaking");
|
|
119
|
+
persistCallRecord(ctx.storePath, call);
|
|
120
|
+
|
|
121
|
+
addTranscriptEntry(call, "bot", text);
|
|
122
|
+
|
|
123
|
+
await ctx.provider.playTts({
|
|
124
|
+
callId,
|
|
125
|
+
providerCallId: call.providerCallId,
|
|
126
|
+
text,
|
|
127
|
+
voice: ctx.config.tts.voice,
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
return { success: true };
|
|
131
|
+
} catch (err) {
|
|
132
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
export async function speakInitialMessage(
|
|
137
|
+
ctx: CallManagerContext,
|
|
138
|
+
providerCallId: string,
|
|
139
|
+
): Promise<void> {
|
|
140
|
+
const call = getCallByProviderCallId({
|
|
141
|
+
activeCalls: ctx.activeCalls,
|
|
142
|
+
providerCallIdMap: ctx.providerCallIdMap,
|
|
143
|
+
providerCallId,
|
|
144
|
+
});
|
|
145
|
+
if (!call) {
|
|
146
|
+
console.warn(`[voice-call] speakInitialMessage: no call found for ${providerCallId}`);
|
|
147
|
+
return;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const initialMessage = call.metadata?.initialMessage as string | undefined;
|
|
151
|
+
const mode = (call.metadata?.mode as CallMode) ?? "conversation";
|
|
152
|
+
|
|
153
|
+
if (!initialMessage) {
|
|
154
|
+
console.log(`[voice-call] speakInitialMessage: no initial message for ${call.callId}`);
|
|
155
|
+
return;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
// Clear so we don't speak it again if the provider reconnects.
|
|
159
|
+
if (call.metadata) {
|
|
160
|
+
delete call.metadata.initialMessage;
|
|
161
|
+
persistCallRecord(ctx.storePath, call);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
console.log(`[voice-call] Speaking initial message for call ${call.callId} (mode: ${mode})`);
|
|
165
|
+
const result = await speak(ctx, call.callId, initialMessage);
|
|
166
|
+
if (!result.success) {
|
|
167
|
+
console.warn(`[voice-call] Failed to speak initial message: ${result.error}`);
|
|
168
|
+
return;
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
if (mode === "notify") {
|
|
172
|
+
const delaySec = ctx.config.outbound.notifyHangupDelaySec;
|
|
173
|
+
console.log(`[voice-call] Notify mode: auto-hangup in ${delaySec}s for call ${call.callId}`);
|
|
174
|
+
setTimeout(async () => {
|
|
175
|
+
const currentCall = ctx.activeCalls.get(call.callId);
|
|
176
|
+
if (currentCall && !TerminalStates.has(currentCall.state)) {
|
|
177
|
+
console.log(`[voice-call] Notify mode: hanging up call ${call.callId}`);
|
|
178
|
+
await endCall(ctx, call.callId);
|
|
179
|
+
}
|
|
180
|
+
}, delaySec * 1000);
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
export async function continueCall(
|
|
185
|
+
ctx: CallManagerContext,
|
|
186
|
+
callId: CallId,
|
|
187
|
+
prompt: string,
|
|
188
|
+
): Promise<{ success: boolean; transcript?: string; error?: string }> {
|
|
189
|
+
const call = ctx.activeCalls.get(callId);
|
|
190
|
+
if (!call) return { success: false, error: "Call not found" };
|
|
191
|
+
if (!ctx.provider || !call.providerCallId) return { success: false, error: "Call not connected" };
|
|
192
|
+
if (TerminalStates.has(call.state)) return { success: false, error: "Call has ended" };
|
|
193
|
+
|
|
194
|
+
try {
|
|
195
|
+
await speak(ctx, callId, prompt);
|
|
196
|
+
|
|
197
|
+
transitionState(call, "listening");
|
|
198
|
+
persistCallRecord(ctx.storePath, call);
|
|
199
|
+
|
|
200
|
+
await ctx.provider.startListening({ callId, providerCallId: call.providerCallId });
|
|
201
|
+
|
|
202
|
+
const transcript = await waitForFinalTranscript(ctx, callId);
|
|
203
|
+
|
|
204
|
+
// Best-effort: stop listening after final transcript.
|
|
205
|
+
await ctx.provider.stopListening({ callId, providerCallId: call.providerCallId });
|
|
206
|
+
|
|
207
|
+
return { success: true, transcript };
|
|
208
|
+
} catch (err) {
|
|
209
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
210
|
+
} finally {
|
|
211
|
+
clearTranscriptWaiter(ctx, callId);
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
export async function endCall(
|
|
216
|
+
ctx: CallManagerContext,
|
|
217
|
+
callId: CallId,
|
|
218
|
+
): Promise<{ success: boolean; error?: string }> {
|
|
219
|
+
const call = ctx.activeCalls.get(callId);
|
|
220
|
+
if (!call) return { success: false, error: "Call not found" };
|
|
221
|
+
if (!ctx.provider || !call.providerCallId) return { success: false, error: "Call not connected" };
|
|
222
|
+
if (TerminalStates.has(call.state)) return { success: true };
|
|
223
|
+
|
|
224
|
+
try {
|
|
225
|
+
await ctx.provider.hangupCall({
|
|
226
|
+
callId,
|
|
227
|
+
providerCallId: call.providerCallId,
|
|
228
|
+
reason: "hangup-bot",
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
call.state = "hangup-bot";
|
|
232
|
+
call.endedAt = Date.now();
|
|
233
|
+
call.endReason = "hangup-bot";
|
|
234
|
+
persistCallRecord(ctx.storePath, call);
|
|
235
|
+
|
|
236
|
+
clearMaxDurationTimer(ctx, callId);
|
|
237
|
+
rejectTranscriptWaiter(ctx, callId, "Call ended: hangup-bot");
|
|
238
|
+
|
|
239
|
+
ctx.activeCalls.delete(callId);
|
|
240
|
+
if (call.providerCallId) ctx.providerCallIdMap.delete(call.providerCallId);
|
|
241
|
+
|
|
242
|
+
return { success: true };
|
|
243
|
+
} catch (err) {
|
|
244
|
+
return { success: false, error: err instanceof Error ? err.message : String(err) };
|
|
245
|
+
}
|
|
246
|
+
}
|
|
247
|
+
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
import { TerminalStates, type CallRecord, type CallState, type TranscriptEntry } from "../types.js";
|
|
2
|
+
|
|
3
|
+
const ConversationStates = new Set<CallState>(["speaking", "listening"]);
|
|
4
|
+
|
|
5
|
+
const StateOrder: readonly CallState[] = [
|
|
6
|
+
"initiated",
|
|
7
|
+
"ringing",
|
|
8
|
+
"answered",
|
|
9
|
+
"active",
|
|
10
|
+
"speaking",
|
|
11
|
+
"listening",
|
|
12
|
+
];
|
|
13
|
+
|
|
14
|
+
export function transitionState(call: CallRecord, newState: CallState): void {
|
|
15
|
+
// No-op for same state or already terminal.
|
|
16
|
+
if (call.state === newState || TerminalStates.has(call.state)) return;
|
|
17
|
+
|
|
18
|
+
// Terminal states can always be reached from non-terminal.
|
|
19
|
+
if (TerminalStates.has(newState)) {
|
|
20
|
+
call.state = newState;
|
|
21
|
+
return;
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Allow cycling between speaking and listening (multi-turn conversations).
|
|
25
|
+
if (ConversationStates.has(call.state) && ConversationStates.has(newState)) {
|
|
26
|
+
call.state = newState;
|
|
27
|
+
return;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Only allow forward transitions in state order.
|
|
31
|
+
const currentIndex = StateOrder.indexOf(call.state);
|
|
32
|
+
const newIndex = StateOrder.indexOf(newState);
|
|
33
|
+
if (newIndex > currentIndex) {
|
|
34
|
+
call.state = newState;
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
export function addTranscriptEntry(
|
|
39
|
+
call: CallRecord,
|
|
40
|
+
speaker: "bot" | "user",
|
|
41
|
+
text: string,
|
|
42
|
+
): void {
|
|
43
|
+
const entry: TranscriptEntry = {
|
|
44
|
+
timestamp: Date.now(),
|
|
45
|
+
speaker,
|
|
46
|
+
text,
|
|
47
|
+
isFinal: true,
|
|
48
|
+
};
|
|
49
|
+
call.transcript.push(entry);
|
|
50
|
+
}
|
|
51
|
+
|