@aihumanity/voice-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/react.js ADDED
@@ -0,0 +1,572 @@
1
+ import { useRef, useState, useEffect, useCallback } from 'react';
2
+ import { UltravoxSession, UltravoxSessionStatus } from 'ultravox-client';
3
+
4
+ // src/react.ts
5
+
6
+ // src/client.ts
7
+ async function fetchJoinUrl(opts) {
8
+ if (opts.fetchJoinUrl) {
9
+ return opts.fetchJoinUrl();
10
+ }
11
+ if (!opts.apiUrl) {
12
+ throw new Error("[aihumanity/voice-sdk] apiUrl is required when fetchJoinUrl is not provided.");
13
+ }
14
+ const baseUrl = opts.apiUrl.replace(/\/+$/, "");
15
+ const body = {
16
+ username: opts.username,
17
+ agentName: opts.agentName,
18
+ ...opts.extraJoinUrlBody ?? {}
19
+ };
20
+ if (opts.dataConnection) {
21
+ body.dataConnection = {
22
+ websocketUrl: opts.dataConnection.websocketUrl,
23
+ audioConfig: {
24
+ sampleRate: opts.dataConnection.audioConfig?.sampleRate ?? 16e3,
25
+ channelMode: opts.dataConnection.audioConfig?.channelMode ?? "CHANNEL_MODE_SEPARATED"
26
+ },
27
+ // Spread user-supplied flags first, then apply defaults for the two
28
+ // most common flags so they're always present if not explicitly set.
29
+ dataMessages: {
30
+ ...opts.dataConnection.dataMessages ?? {},
31
+ userStartedSpeaking: opts.dataConnection.dataMessages?.userStartedSpeaking ?? true,
32
+ userStoppedSpeaking: opts.dataConnection.dataMessages?.userStoppedSpeaking ?? true
33
+ }
34
+ };
35
+ }
36
+ if (opts.publicKey) {
37
+ const url2 = baseUrl + (opts.joinUrlPath ?? "/v1/voice/joinurl");
38
+ const res2 = await fetch(url2, {
39
+ method: "POST",
40
+ headers: {
41
+ "Content-Type": "application/json",
42
+ "X-Public-Key": opts.publicKey
43
+ },
44
+ body: JSON.stringify(body)
45
+ });
46
+ return parseJoinUrlResponse(res2);
47
+ }
48
+ const token = typeof opts.authToken === "function" ? await opts.authToken() : opts.authToken;
49
+ if (!token) {
50
+ throw new Error(
51
+ "[aihumanity/voice-sdk] Provide publicKey (for browser-direct) or authToken (for server-side)."
52
+ );
53
+ }
54
+ const path = opts.joinUrlPath ?? "/ultravox/secure/joinurl";
55
+ const url = baseUrl + path;
56
+ const res = await fetch(url, {
57
+ method: "POST",
58
+ headers: {
59
+ "Content-Type": "application/json",
60
+ Authorization: `Bearer ${token}`
61
+ },
62
+ body: JSON.stringify(body)
63
+ });
64
+ return parseJoinUrlResponse(res);
65
+ }
66
+ async function parseJoinUrlResponse(res) {
67
+ const text = await res.text();
68
+ if (!res.ok) {
69
+ throw new Error(
70
+ `[aihumanity/voice-sdk] joinUrl request failed (${res.status}): ${text}`
71
+ );
72
+ }
73
+ let parsed;
74
+ try {
75
+ parsed = JSON.parse(text);
76
+ } catch {
77
+ throw new Error(
78
+ `[aihumanity/voice-sdk] joinUrl response was not valid JSON: ${text}`
79
+ );
80
+ }
81
+ if (!parsed.joinUrl) {
82
+ throw new Error(
83
+ "[aihumanity/voice-sdk] joinUrl response did not include `joinUrl`."
84
+ );
85
+ }
86
+ return parsed;
87
+ }
88
+
89
+ // src/emitter.ts
90
+ var TypedEmitter = class {
91
+ constructor() {
92
+ this.listeners = /* @__PURE__ */ new Map();
93
+ }
94
+ on(event, listener) {
95
+ let set = this.listeners.get(event);
96
+ if (!set) {
97
+ set = /* @__PURE__ */ new Set();
98
+ this.listeners.set(event, set);
99
+ }
100
+ set.add(listener);
101
+ return () => this.off(event, listener);
102
+ }
103
+ off(event, listener) {
104
+ this.listeners.get(event)?.delete(listener);
105
+ }
106
+ once(event, listener) {
107
+ const off = this.on(event, ((payload) => {
108
+ off();
109
+ listener(payload);
110
+ }));
111
+ return off;
112
+ }
113
+ emit(event, payload) {
114
+ const set = this.listeners.get(event);
115
+ if (!set) return;
116
+ for (const fn of Array.from(set)) {
117
+ try {
118
+ fn(payload);
119
+ } catch (err) {
120
+ console.error("[aihumanity/voice-sdk] listener error:", err);
121
+ }
122
+ }
123
+ }
124
+ removeAllListeners() {
125
+ this.listeners.clear();
126
+ }
127
+ };
128
+
129
+ // src/types.ts
130
+ var CallStatus = /* @__PURE__ */ ((CallStatus2) => {
131
+ CallStatus2["IDLE"] = "idle";
132
+ CallStatus2["CONNECTING"] = "connecting";
133
+ CallStatus2["CONNECTED"] = "connected";
134
+ CallStatus2["LISTENING"] = "listening";
135
+ CallStatus2["THINKING"] = "thinking";
136
+ CallStatus2["SPEAKING"] = "speaking";
137
+ CallStatus2["DISCONNECTING"] = "disconnecting";
138
+ CallStatus2["DISCONNECTED"] = "disconnected";
139
+ return CallStatus2;
140
+ })(CallStatus || {});
141
+ var Speaker = /* @__PURE__ */ ((Speaker2) => {
142
+ Speaker2["USER"] = "user";
143
+ Speaker2["AGENT"] = "agent";
144
+ return Speaker2;
145
+ })(Speaker || {});
146
+
147
+ // src/VoiceCall.ts
148
+ var DEFAULT_EMOTION_PATTERN = /\[EMOTION_CONTEXT\][^:]*:\s*(\w+)/i;
149
+ var CONTACT_SAVED_PATTERN = /I['']ve (noted|saved|got|recorded) your (contact|info|details|number|email)/i;
150
+ var LIVE_STATUSES = /* @__PURE__ */ new Set([
151
+ "connected" /* CONNECTED */,
152
+ "listening" /* LISTENING */,
153
+ "thinking" /* THINKING */,
154
+ "speaking" /* SPEAKING */
155
+ ]);
156
+ var VoiceCall = class {
157
+ constructor(opts) {
158
+ this.emitter = new TypedEmitter();
159
+ this.session = null;
160
+ this._status = "idle" /* IDLE */;
161
+ this._callId = null;
162
+ this._sessionToken = null;
163
+ this._transcripts = [];
164
+ this._lastEmotion = null;
165
+ this._contactSaved = false;
166
+ this._starting = false;
167
+ this._emotionMeta = null;
168
+ this._pollTimer = null;
169
+ this.opts = opts;
170
+ }
171
+ // ── Public read-only state ────────────────────────────────────────────────
172
+ get status() {
173
+ return this._status;
174
+ }
175
+ get callId() {
176
+ return this._callId;
177
+ }
178
+ get sessionToken() {
179
+ return this._sessionToken;
180
+ }
181
+ get transcripts() {
182
+ return this._transcripts.slice();
183
+ }
184
+ get lastEmotion() {
185
+ return this._lastEmotion;
186
+ }
187
+ get contactSaved() {
188
+ return this._contactSaved;
189
+ }
190
+ get isMicMuted() {
191
+ return this.session?.isMicMuted ?? false;
192
+ }
193
+ get isSpeakerMuted() {
194
+ return this.session?.isSpeakerMuted ?? false;
195
+ }
196
+ /** Server-reported wiring info from the join-url response, if any. */
197
+ get emotionMeta() {
198
+ return this._emotionMeta;
199
+ }
200
+ /** Underlying ultravox-client session. Use sparingly — for power users. */
201
+ get rawSession() {
202
+ return this.session;
203
+ }
204
+ // ── Event API ─────────────────────────────────────────────────────────────
205
+ on(event, listener) {
206
+ return this.emitter.on(event, listener);
207
+ }
208
+ off(event, listener) {
209
+ this.emitter.off(event, listener);
210
+ }
211
+ once(event, listener) {
212
+ return this.emitter.once(event, listener);
213
+ }
214
+ // ── Control ───────────────────────────────────────────────────────────────
215
+ /**
216
+ * Fetches a joinUrl from the backend, opens an Ultravox session, and starts
217
+ * the call. Resolves once `joinCall` has been kicked off (the call goes
218
+ * "live" asynchronously via status events).
219
+ */
220
+ async start() {
221
+ if (this._starting) return;
222
+ if (this._status !== "idle" /* IDLE */ && this._status !== "disconnected" /* DISCONNECTED */) {
223
+ return;
224
+ }
225
+ this._starting = true;
226
+ this.resetMutableState();
227
+ this.setStatus("connecting" /* CONNECTING */);
228
+ try {
229
+ const payload = await fetchJoinUrl(this.opts);
230
+ this._callId = payload.callId ?? null;
231
+ this._sessionToken = payload.sessionToken ?? null;
232
+ this._emotionMeta = payload.emotion ?? null;
233
+ this.surfaceEmotionWarnings(payload);
234
+ const session = new UltravoxSession({
235
+ audioContext: this.opts.audioContext,
236
+ additionalMessages: this.opts.additionalMessages
237
+ });
238
+ this.session = session;
239
+ this.attachSessionListeners(session);
240
+ session.joinCall(payload.joinUrl);
241
+ } catch (err) {
242
+ const e = err instanceof Error ? err : new Error(String(err));
243
+ this.emitter.emit("error", e);
244
+ this.setStatus("idle" /* IDLE */);
245
+ } finally {
246
+ this._starting = false;
247
+ }
248
+ }
249
+ /** Hangs up. Resolves when ultravox-client confirms disconnection. */
250
+ async end() {
251
+ if (!this.session) return;
252
+ this.setStatus("disconnecting" /* DISCONNECTING */);
253
+ try {
254
+ await this.session.leaveCall();
255
+ } catch (err) {
256
+ this.emitter.emit(
257
+ "error",
258
+ err instanceof Error ? err : new Error(String(err))
259
+ );
260
+ }
261
+ }
262
+ muteMic() {
263
+ this.session?.muteMic();
264
+ this.emitter.emit("mic_muted", true);
265
+ }
266
+ unmuteMic() {
267
+ this.session?.unmuteMic();
268
+ this.emitter.emit("mic_muted", false);
269
+ }
270
+ toggleMicMute() {
271
+ if (!this.session) return false;
272
+ this.session.toggleMicMute();
273
+ const muted = this.session.isMicMuted;
274
+ this.emitter.emit("mic_muted", muted);
275
+ return muted;
276
+ }
277
+ muteSpeaker() {
278
+ this.session?.muteSpeaker();
279
+ this.emitter.emit("speaker_muted", true);
280
+ }
281
+ unmuteSpeaker() {
282
+ this.session?.unmuteSpeaker();
283
+ this.emitter.emit("speaker_muted", false);
284
+ }
285
+ toggleSpeakerMute() {
286
+ if (!this.session) return false;
287
+ this.session.toggleSpeakerMute();
288
+ const muted = this.session.isSpeakerMuted;
289
+ this.emitter.emit("speaker_muted", muted);
290
+ return muted;
291
+ }
292
+ /** Sends a text message into the call (no spoken audio from the user). */
293
+ sendText(text, deferResponse = false) {
294
+ this.session?.sendText(text, deferResponse);
295
+ }
296
+ /** Sends an arbitrary data message over Ultravox's data channel. */
297
+ sendData(obj) {
298
+ this.session?.sendData(obj);
299
+ }
300
+ /** Removes all listeners and aborts any active session. */
301
+ dispose() {
302
+ this.stopEmotionPolling();
303
+ void this.session?.leaveCall().catch(() => {
304
+ });
305
+ this.session = null;
306
+ this.emitter.removeAllListeners();
307
+ this._status = "idle" /* IDLE */;
308
+ }
309
+ // ── Internals ─────────────────────────────────────────────────────────────
310
+ resetMutableState() {
311
+ this._callId = null;
312
+ this._sessionToken = null;
313
+ this._transcripts = [];
314
+ this._lastEmotion = null;
315
+ this._contactSaved = false;
316
+ this._emotionMeta = null;
317
+ this.stopEmotionPolling();
318
+ }
319
+ attachSessionListeners(session) {
320
+ session.addEventListener("status", () => this.handleStatusChange(session));
321
+ session.addEventListener("transcripts", () => this.handleTranscripts(session));
322
+ session.addEventListener(
323
+ "experimental_message",
324
+ (evt) => this.handleDataMessage(evt)
325
+ );
326
+ }
327
+ handleStatusChange(session) {
328
+ const raw = session.status;
329
+ this.emitter.emit("raw_status", String(raw));
330
+ const prevStatus = this._status;
331
+ let next;
332
+ switch (raw) {
333
+ case UltravoxSessionStatus.CONNECTING:
334
+ next = "connecting" /* CONNECTING */;
335
+ break;
336
+ case UltravoxSessionStatus.IDLE:
337
+ next = "connected" /* CONNECTED */;
338
+ break;
339
+ case UltravoxSessionStatus.LISTENING:
340
+ next = "listening" /* LISTENING */;
341
+ break;
342
+ case UltravoxSessionStatus.THINKING:
343
+ next = "thinking" /* THINKING */;
344
+ break;
345
+ case UltravoxSessionStatus.SPEAKING:
346
+ next = "speaking" /* SPEAKING */;
347
+ break;
348
+ case UltravoxSessionStatus.DISCONNECTING:
349
+ next = "disconnecting" /* DISCONNECTING */;
350
+ break;
351
+ case UltravoxSessionStatus.DISCONNECTED:
352
+ next = "disconnected" /* DISCONNECTED */;
353
+ break;
354
+ default:
355
+ next = this._status;
356
+ }
357
+ this.setStatus(next);
358
+ const wasLive = LIVE_STATUSES.has(prevStatus);
359
+ const nowLive = LIVE_STATUSES.has(next);
360
+ if (!wasLive && nowLive && this._callId && this.opts.pollEmotion) {
361
+ this.startEmotionPolling();
362
+ }
363
+ if (next === "disconnected" /* DISCONNECTED */) {
364
+ this.stopEmotionPolling();
365
+ this.session = null;
366
+ this.emitter.emit("ended", void 0);
367
+ this.setStatus("idle" /* IDLE */);
368
+ }
369
+ }
370
+ handleTranscripts(session) {
371
+ const raw = session.transcripts ?? [];
372
+ const mapped = raw.map(toPublicTranscript);
373
+ const previous = this._transcripts;
374
+ this._transcripts = mapped;
375
+ for (let i = 0; i < mapped.length; i++) {
376
+ const cur = mapped[i];
377
+ if (!cur) continue;
378
+ const prev = previous[i];
379
+ if (!prev || prev.text !== cur.text || prev.isFinal !== cur.isFinal) {
380
+ this.emitter.emit("transcript", cur);
381
+ }
382
+ }
383
+ this.emitter.emit("transcripts", mapped);
384
+ if (!this._contactSaved) {
385
+ const saved = mapped.some(
386
+ (t) => t.speaker === "agent" /* AGENT */ && t.text && CONTACT_SAVED_PATTERN.test(t.text)
387
+ );
388
+ if (saved) {
389
+ this._contactSaved = true;
390
+ this.emitter.emit("contact_saved", void 0);
391
+ }
392
+ }
393
+ }
394
+ handleDataMessage(evt) {
395
+ const anyEvt = evt;
396
+ const raw = anyEvt.message ?? anyEvt.data ?? evt;
397
+ this.emitter.emit("data_message", raw);
398
+ const text = typeof raw === "string" ? raw : safeStringify(raw);
399
+ const pattern = this.opts.emotionPattern ?? DEFAULT_EMOTION_PATTERN;
400
+ const match = text.match(pattern);
401
+ if (match && match[1]) {
402
+ const label = match[1].toLowerCase();
403
+ this._lastEmotion = label;
404
+ this.emitter.emit("emotion", { label, raw });
405
+ }
406
+ }
407
+ startEmotionPolling() {
408
+ this.stopEmotionPolling();
409
+ if (!this.opts.pollEmotion || !this._callId) return;
410
+ const callId = this._callId;
411
+ const sessionToken = this._sessionToken ?? void 0;
412
+ const interval = this.opts.emotionPollIntervalMs ?? 15e3;
413
+ const poll = async () => {
414
+ try {
415
+ const label = await this.opts.pollEmotion(callId, sessionToken);
416
+ if (label) {
417
+ const normalized = label.toLowerCase();
418
+ this._lastEmotion = normalized;
419
+ this.emitter.emit("emotion", { label: normalized, raw: label });
420
+ }
421
+ } catch {
422
+ }
423
+ };
424
+ poll();
425
+ this._pollTimer = setInterval(poll, interval);
426
+ }
427
+ stopEmotionPolling() {
428
+ if (this._pollTimer !== null) {
429
+ clearInterval(this._pollTimer);
430
+ this._pollTimer = null;
431
+ }
432
+ }
433
+ setStatus(next) {
434
+ if (next === this._status) return;
435
+ this._status = next;
436
+ this.emitter.emit("status", next);
437
+ }
438
+ surfaceEmotionWarnings(payload) {
439
+ const e = payload.emotion;
440
+ if (!e) return;
441
+ if (e.dataConnectionEnabled === false) {
442
+ this.emitter.emit(
443
+ "warning",
444
+ "dataConnection not enabled \u2014 check emotion WebSocket URL."
445
+ );
446
+ }
447
+ if (e.audioEnabled === false) {
448
+ this.emitter.emit(
449
+ "warning",
450
+ "audio not enabled \u2014 audioConfig missing on backend request."
451
+ );
452
+ }
453
+ if (e.emotionBridgeConfigured === false) {
454
+ this.emitter.emit(
455
+ "warning",
456
+ "emotionBridge not configured on the backend."
457
+ );
458
+ }
459
+ if (e.autoSendEnabled === false) {
460
+ this.emitter.emit(
461
+ "warning",
462
+ "auto-send not enabled \u2014 emotion will not feed back into the agent."
463
+ );
464
+ }
465
+ }
466
+ };
467
+ function toPublicTranscript(t) {
468
+ return {
469
+ text: t.text,
470
+ isFinal: t.isFinal,
471
+ speaker: t.speaker,
472
+ medium: t.medium,
473
+ ordinal: t.ordinal
474
+ };
475
+ }
476
+ function safeStringify(v) {
477
+ try {
478
+ return JSON.stringify(v);
479
+ } catch {
480
+ return String(v);
481
+ }
482
+ }
483
+
484
+ // src/react.ts
485
+ var LIVE_STATES = /* @__PURE__ */ new Set([
486
+ "connected" /* CONNECTED */,
487
+ "listening" /* LISTENING */,
488
+ "thinking" /* THINKING */,
489
+ "speaking" /* SPEAKING */
490
+ ]);
491
+ function useVoiceCall(options) {
492
+ const optsRef = useRef(options);
493
+ optsRef.current = options;
494
+ const callRef = useRef(null);
495
+ if (callRef.current === null) {
496
+ callRef.current = new VoiceCall({
497
+ ...options,
498
+ authToken: typeof options.authToken === "function" ? () => optsRef.current.authToken() : options.authToken
499
+ });
500
+ }
501
+ const call = callRef.current;
502
+ const [status, setStatus] = useState(call.status);
503
+ const [transcripts, setTranscripts] = useState(call.transcripts);
504
+ const [lastEmotion, setLastEmotion] = useState(call.lastEmotion);
505
+ const [contactSaved, setContactSaved] = useState(call.contactSaved);
506
+ const [micMuted, setMicMuted] = useState(call.isMicMuted);
507
+ const [speakerMuted, setSpeakerMuted] = useState(call.isSpeakerMuted);
508
+ const [warning, setWarning] = useState(null);
509
+ const [error, setError] = useState(null);
510
+ const [callId, setCallId] = useState(call.callId);
511
+ const [sessionToken, setSessionToken] = useState(call.sessionToken);
512
+ useEffect(() => {
513
+ const offs = [
514
+ call.on("status", (s) => {
515
+ setStatus(s);
516
+ setCallId(call.callId);
517
+ setSessionToken(call.sessionToken);
518
+ }),
519
+ call.on("transcripts", (all) => setTranscripts(all)),
520
+ call.on("emotion", (e) => setLastEmotion(e.label)),
521
+ call.on("contact_saved", () => setContactSaved(true)),
522
+ call.on("mic_muted", (m) => setMicMuted(m)),
523
+ call.on("speaker_muted", (m) => setSpeakerMuted(m)),
524
+ call.on("warning", (w) => setWarning(w)),
525
+ call.on("error", (e) => setError(e))
526
+ ];
527
+ return () => {
528
+ for (const off of offs) off();
529
+ };
530
+ }, [call]);
531
+ useEffect(() => {
532
+ return () => {
533
+ call.dispose();
534
+ };
535
+ }, [call]);
536
+ const start = useCallback(() => {
537
+ setError(null);
538
+ setWarning(null);
539
+ return call.start();
540
+ }, [call]);
541
+ const end = useCallback(() => call.end(), [call]);
542
+ const toggleMicMute = useCallback(() => call.toggleMicMute(), [call]);
543
+ const toggleSpeakerMute = useCallback(() => call.toggleSpeakerMute(), [call]);
544
+ const sendText = useCallback(
545
+ (text, deferResponse) => call.sendText(text, deferResponse),
546
+ [call]
547
+ );
548
+ return {
549
+ status,
550
+ isBusy: status === "connecting" /* CONNECTING */ || status === "disconnecting" /* DISCONNECTING */,
551
+ isLive: LIVE_STATES.has(status),
552
+ transcripts,
553
+ lastEmotion,
554
+ contactSaved,
555
+ micMuted,
556
+ speakerMuted,
557
+ warning,
558
+ error,
559
+ callId,
560
+ sessionToken,
561
+ start,
562
+ end,
563
+ toggleMicMute,
564
+ toggleSpeakerMute,
565
+ sendText,
566
+ call
567
+ };
568
+ }
569
+
570
+ export { CallStatus, Speaker, VoiceCall, useVoiceCall };
571
+ //# sourceMappingURL=react.js.map
572
+ //# sourceMappingURL=react.js.map