@clawdbot/voice-call 0.1.0 → 2026.1.16

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,89 @@
1
+ import fs from "node:fs";
2
+ import fsp from "node:fs/promises";
3
+ import path from "node:path";
4
+
5
+ import { CallRecordSchema, TerminalStates, type CallId, type CallRecord } from "../types.js";
6
+
7
+ export function persistCallRecord(storePath: string, call: CallRecord): void {
8
+ const logPath = path.join(storePath, "calls.jsonl");
9
+ const line = `${JSON.stringify(call)}\n`;
10
+ // Fire-and-forget async write to avoid blocking event loop.
11
+ fsp.appendFile(logPath, line).catch((err) => {
12
+ console.error("[voice-call] Failed to persist call record:", err);
13
+ });
14
+ }
15
+
16
+ export function loadActiveCallsFromStore(storePath: string): {
17
+ activeCalls: Map<CallId, CallRecord>;
18
+ providerCallIdMap: Map<string, CallId>;
19
+ processedEventIds: Set<string>;
20
+ } {
21
+ const logPath = path.join(storePath, "calls.jsonl");
22
+ if (!fs.existsSync(logPath)) {
23
+ return {
24
+ activeCalls: new Map(),
25
+ providerCallIdMap: new Map(),
26
+ processedEventIds: new Set(),
27
+ };
28
+ }
29
+
30
+ const content = fs.readFileSync(logPath, "utf-8");
31
+ const lines = content.split("\n");
32
+
33
+ const callMap = new Map<CallId, CallRecord>();
34
+ for (const line of lines) {
35
+ if (!line.trim()) continue;
36
+ try {
37
+ const call = CallRecordSchema.parse(JSON.parse(line));
38
+ callMap.set(call.callId, call);
39
+ } catch {
40
+ // Skip invalid lines.
41
+ }
42
+ }
43
+
44
+ const activeCalls = new Map<CallId, CallRecord>();
45
+ const providerCallIdMap = new Map<string, CallId>();
46
+ const processedEventIds = new Set<string>();
47
+
48
+ for (const [callId, call] of callMap) {
49
+ if (TerminalStates.has(call.state)) continue;
50
+ activeCalls.set(callId, call);
51
+ if (call.providerCallId) {
52
+ providerCallIdMap.set(call.providerCallId, callId);
53
+ }
54
+ for (const eventId of call.processedEventIds) {
55
+ processedEventIds.add(eventId);
56
+ }
57
+ }
58
+
59
+ return { activeCalls, providerCallIdMap, processedEventIds };
60
+ }
61
+
62
+ export async function getCallHistoryFromStore(
63
+ storePath: string,
64
+ limit = 50,
65
+ ): Promise<CallRecord[]> {
66
+ const logPath = path.join(storePath, "calls.jsonl");
67
+
68
+ try {
69
+ await fsp.access(logPath);
70
+ } catch {
71
+ return [];
72
+ }
73
+
74
+ const content = await fsp.readFile(logPath, "utf-8");
75
+ const lines = content.trim().split("\n").filter(Boolean);
76
+ const calls: CallRecord[] = [];
77
+
78
+ for (const line of lines.slice(-limit)) {
79
+ try {
80
+ const parsed = CallRecordSchema.parse(JSON.parse(line));
81
+ calls.push(parsed);
82
+ } catch {
83
+ // Skip invalid lines.
84
+ }
85
+ }
86
+
87
+ return calls;
88
+ }
89
+
@@ -0,0 +1,87 @@
1
+ import { TerminalStates, type CallId } from "../types.js";
2
+ import type { CallManagerContext } from "./context.js";
3
+ import { persistCallRecord } from "./store.js";
4
+
5
+ export function clearMaxDurationTimer(ctx: CallManagerContext, callId: CallId): void {
6
+ const timer = ctx.maxDurationTimers.get(callId);
7
+ if (timer) {
8
+ clearTimeout(timer);
9
+ ctx.maxDurationTimers.delete(callId);
10
+ }
11
+ }
12
+
13
+ export function startMaxDurationTimer(params: {
14
+ ctx: CallManagerContext;
15
+ callId: CallId;
16
+ onTimeout: (callId: CallId) => Promise<void>;
17
+ }): void {
18
+ clearMaxDurationTimer(params.ctx, params.callId);
19
+
20
+ const maxDurationMs = params.ctx.config.maxDurationSeconds * 1000;
21
+ console.log(
22
+ `[voice-call] Starting max duration timer (${params.ctx.config.maxDurationSeconds}s) for call ${params.callId}`,
23
+ );
24
+
25
+ const timer = setTimeout(async () => {
26
+ params.ctx.maxDurationTimers.delete(params.callId);
27
+ const call = params.ctx.activeCalls.get(params.callId);
28
+ if (call && !TerminalStates.has(call.state)) {
29
+ console.log(
30
+ `[voice-call] Max duration reached (${params.ctx.config.maxDurationSeconds}s), ending call ${params.callId}`,
31
+ );
32
+ call.endReason = "timeout";
33
+ persistCallRecord(params.ctx.storePath, call);
34
+ await params.onTimeout(params.callId);
35
+ }
36
+ }, maxDurationMs);
37
+
38
+ params.ctx.maxDurationTimers.set(params.callId, timer);
39
+ }
40
+
41
+ export function clearTranscriptWaiter(ctx: CallManagerContext, callId: CallId): void {
42
+ const waiter = ctx.transcriptWaiters.get(callId);
43
+ if (!waiter) return;
44
+ clearTimeout(waiter.timeout);
45
+ ctx.transcriptWaiters.delete(callId);
46
+ }
47
+
48
+ export function rejectTranscriptWaiter(
49
+ ctx: CallManagerContext,
50
+ callId: CallId,
51
+ reason: string,
52
+ ): void {
53
+ const waiter = ctx.transcriptWaiters.get(callId);
54
+ if (!waiter) return;
55
+ clearTranscriptWaiter(ctx, callId);
56
+ waiter.reject(new Error(reason));
57
+ }
58
+
59
+ export function resolveTranscriptWaiter(
60
+ ctx: CallManagerContext,
61
+ callId: CallId,
62
+ transcript: string,
63
+ ): void {
64
+ const waiter = ctx.transcriptWaiters.get(callId);
65
+ if (!waiter) return;
66
+ clearTranscriptWaiter(ctx, callId);
67
+ waiter.resolve(transcript);
68
+ }
69
+
70
+ export function waitForFinalTranscript(
71
+ ctx: CallManagerContext,
72
+ callId: CallId,
73
+ ): Promise<string> {
74
+ // Only allow one in-flight waiter per call.
75
+ rejectTranscriptWaiter(ctx, callId, "Transcript waiter replaced");
76
+
77
+ const timeoutMs = ctx.config.transcriptTimeoutMs;
78
+ return new Promise((resolve, reject) => {
79
+ const timeout = setTimeout(() => {
80
+ ctx.transcriptWaiters.delete(callId);
81
+ reject(new Error(`Timed out waiting for transcript after ${timeoutMs}ms`));
82
+ }, timeoutMs);
83
+
84
+ ctx.transcriptWaiters.set(callId, { resolve, reject, timeout });
85
+ });
86
+ }
87
+
@@ -0,0 +1,10 @@
1
+ import { escapeXml } from "../voice-mapping.js";
2
+
3
+ export function generateNotifyTwiml(message: string, voice: string): string {
4
+ return `<?xml version="1.0" encoding="UTF-8"?>
5
+ <Response>
6
+ <Say voice="${voice}">${escapeXml(message)}</Say>
7
+ <Hangup/>
8
+ </Response>`;
9
+ }
10
+
@@ -0,0 +1,108 @@
1
+ import os from "node:os";
2
+ import path from "node:path";
3
+
4
+ import { describe, expect, it } from "vitest";
5
+
6
+ import { VoiceCallConfigSchema } from "./config.js";
7
+ import { CallManager } from "./manager.js";
8
+ import type {
9
+ HangupCallInput,
10
+ InitiateCallInput,
11
+ InitiateCallResult,
12
+ PlayTtsInput,
13
+ ProviderWebhookParseResult,
14
+ StartListeningInput,
15
+ StopListeningInput,
16
+ WebhookContext,
17
+ WebhookVerificationResult,
18
+ } from "./types.js";
19
+ import type { VoiceCallProvider } from "./providers/base.js";
20
+
21
+ class FakeProvider implements VoiceCallProvider {
22
+ readonly name = "plivo" as const;
23
+ readonly playTtsCalls: PlayTtsInput[] = [];
24
+
25
+ verifyWebhook(_ctx: WebhookContext): WebhookVerificationResult {
26
+ return { ok: true };
27
+ }
28
+ parseWebhookEvent(_ctx: WebhookContext): ProviderWebhookParseResult {
29
+ return { events: [], statusCode: 200 };
30
+ }
31
+ async initiateCall(_input: InitiateCallInput): Promise<InitiateCallResult> {
32
+ return { providerCallId: "request-uuid", status: "initiated" };
33
+ }
34
+ async hangupCall(_input: HangupCallInput): Promise<void> {}
35
+ async playTts(input: PlayTtsInput): Promise<void> {
36
+ this.playTtsCalls.push(input);
37
+ }
38
+ async startListening(_input: StartListeningInput): Promise<void> {}
39
+ async stopListening(_input: StopListeningInput): Promise<void> {}
40
+ }
41
+
42
+ describe("CallManager", () => {
43
+ it("upgrades providerCallId mapping when provider ID changes", async () => {
44
+ const config = VoiceCallConfigSchema.parse({
45
+ enabled: true,
46
+ provider: "plivo",
47
+ fromNumber: "+15550000000",
48
+ });
49
+
50
+ const storePath = path.join(os.tmpdir(), `clawdbot-voice-call-test-${Date.now()}`);
51
+ const manager = new CallManager(config, storePath);
52
+ manager.initialize(new FakeProvider(), "https://example.com/voice/webhook");
53
+
54
+ const { callId, success, error } = await manager.initiateCall("+15550000001");
55
+ expect(success).toBe(true);
56
+ expect(error).toBeUndefined();
57
+
58
+ // The provider returned a request UUID as the initial providerCallId.
59
+ expect(manager.getCall(callId)?.providerCallId).toBe("request-uuid");
60
+ expect(manager.getCallByProviderCallId("request-uuid")?.callId).toBe(callId);
61
+
62
+ // Provider later reports the actual call UUID.
63
+ manager.processEvent({
64
+ id: "evt-1",
65
+ type: "call.answered",
66
+ callId,
67
+ providerCallId: "call-uuid",
68
+ timestamp: Date.now(),
69
+ });
70
+
71
+ expect(manager.getCall(callId)?.providerCallId).toBe("call-uuid");
72
+ expect(manager.getCallByProviderCallId("call-uuid")?.callId).toBe(callId);
73
+ expect(manager.getCallByProviderCallId("request-uuid")).toBeUndefined();
74
+ });
75
+
76
+ it("speaks initial message on answered for notify mode (non-Twilio)", async () => {
77
+ const config = VoiceCallConfigSchema.parse({
78
+ enabled: true,
79
+ provider: "plivo",
80
+ fromNumber: "+15550000000",
81
+ });
82
+
83
+ const storePath = path.join(os.tmpdir(), `clawdbot-voice-call-test-${Date.now()}`);
84
+ const provider = new FakeProvider();
85
+ const manager = new CallManager(config, storePath);
86
+ manager.initialize(provider, "https://example.com/voice/webhook");
87
+
88
+ const { callId, success } = await manager.initiateCall(
89
+ "+15550000002",
90
+ undefined,
91
+ { message: "Hello there", mode: "notify" },
92
+ );
93
+ expect(success).toBe(true);
94
+
95
+ manager.processEvent({
96
+ id: "evt-2",
97
+ type: "call.answered",
98
+ callId,
99
+ providerCallId: "call-uuid",
100
+ timestamp: Date.now(),
101
+ });
102
+
103
+ await new Promise((resolve) => setTimeout(resolve, 0));
104
+
105
+ expect(provider.playTtsCalls).toHaveLength(1);
106
+ expect(provider.playTtsCalls[0]?.text).toBe("Hello there");
107
+ });
108
+ });
package/src/manager.ts CHANGED
@@ -580,8 +580,16 @@ export class CallManager {
580
580
  }
581
581
 
582
582
  // Update provider call ID if we got it
583
- if (event.providerCallId && !call.providerCallId) {
583
+ if (event.providerCallId && event.providerCallId !== call.providerCallId) {
584
+ const previousProviderCallId = call.providerCallId;
584
585
  call.providerCallId = event.providerCallId;
586
+ this.providerCallIdMap.set(event.providerCallId, call.callId);
587
+ if (previousProviderCallId) {
588
+ const mapped = this.providerCallIdMap.get(previousProviderCallId);
589
+ if (mapped === call.callId) {
590
+ this.providerCallIdMap.delete(previousProviderCallId);
591
+ }
592
+ }
585
593
  }
586
594
 
587
595
  // Track processed event
@@ -602,6 +610,9 @@ export class CallManager {
602
610
  this.transitionState(call, "answered");
603
611
  // Start max duration timer when call is answered
604
612
  this.startMaxDurationTimer(call.callId);
613
+ // Best-effort: speak initial message (for inbound greetings and outbound
614
+ // conversation mode) once the call is answered.
615
+ this.maybeSpeakInitialMessageOnAnswered(call);
605
616
  break;
606
617
 
607
618
  case "call.active":
@@ -653,6 +664,23 @@ export class CallManager {
653
664
  this.persistCallRecord(call);
654
665
  }
655
666
 
667
+ private maybeSpeakInitialMessageOnAnswered(call: CallRecord): void {
668
+ const initialMessage =
669
+ typeof call.metadata?.initialMessage === "string"
670
+ ? call.metadata.initialMessage.trim()
671
+ : "";
672
+
673
+ if (!initialMessage) return;
674
+
675
+ if (!this.provider || !call.providerCallId) return;
676
+
677
+ // Twilio has provider-specific state for speaking (<Say> fallback) and can
678
+ // fail for inbound calls; keep existing Twilio behavior unchanged.
679
+ if (this.provider.name === "twilio") return;
680
+
681
+ void this.speakInitialMessage(call.providerCallId);
682
+ }
683
+
656
684
  /**
657
685
  * Get an active call by ID.
658
686
  */
@@ -7,3 +7,4 @@ export {
7
7
  } from "./stt-openai-realtime.js";
8
8
  export { TelnyxProvider } from "./telnyx.js";
9
9
  export { TwilioProvider } from "./twilio.js";
10
+ export { PlivoProvider } from "./plivo.js";
@@ -0,0 +1,29 @@
1
+ import { describe, expect, it } from "vitest";
2
+
3
+ import { PlivoProvider } from "./plivo.js";
4
+
5
+ describe("PlivoProvider", () => {
6
+ it("parses answer callback into call.answered and returns keep-alive XML", () => {
7
+ const provider = new PlivoProvider({
8
+ authId: "MA000000000000000000",
9
+ authToken: "test-token",
10
+ });
11
+
12
+ const result = provider.parseWebhookEvent({
13
+ headers: { host: "example.com" },
14
+ rawBody:
15
+ "CallUUID=call-uuid&CallStatus=in-progress&Direction=outbound&From=%2B15550000000&To=%2B15550000001&Event=StartApp",
16
+ url: "https://example.com/voice/webhook?provider=plivo&flow=answer&callId=internal-call-id",
17
+ method: "POST",
18
+ query: { provider: "plivo", flow: "answer", callId: "internal-call-id" },
19
+ });
20
+
21
+ expect(result.events).toHaveLength(1);
22
+ expect(result.events[0]?.type).toBe("call.answered");
23
+ expect(result.events[0]?.callId).toBe("internal-call-id");
24
+ expect(result.events[0]?.providerCallId).toBe("call-uuid");
25
+ expect(result.providerResponseBody).toContain("<Wait");
26
+ expect(result.providerResponseBody).toContain('length="300"');
27
+ });
28
+ });
29
+