@aihumanity/voice-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +486 -0
- package/dist/VoiceCall-_BBARIQT.d.ts +276 -0
- package/dist/index.d.ts +19 -0
- package/dist/index.js +485 -0
- package/dist/index.js.map +1 -0
- package/dist/react.d.ts +60 -0
- package/dist/react.js +572 -0
- package/dist/react.js.map +1 -0
- package/dist/widget.d.ts +68 -0
- package/dist/widget.js +781 -0
- package/dist/widget.js.map +1 -0
- package/package.json +87 -0
|
@@ -0,0 +1,276 @@
|
|
|
1
|
+
import { UltravoxSession } from 'ultravox-client';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Public types shared across the SDK.
|
|
5
|
+
*
|
|
6
|
+
* Status names are aligned with Ultravox's session-status enum so the SDK is
|
|
7
|
+
* a thin, semantic wrapper.
|
|
8
|
+
*/
|
|
9
|
+
/** Coarse-grained call lifecycle states the SDK exposes to consumers. */
|
|
10
|
+
declare enum CallStatus {
|
|
11
|
+
/** No active call; ready to start. */
|
|
12
|
+
IDLE = "idle",
|
|
13
|
+
/** Backend is being asked for a joinUrl, or WebRTC is connecting. */
|
|
14
|
+
CONNECTING = "connecting",
|
|
15
|
+
/** Call is live; agent is ready to listen. */
|
|
16
|
+
CONNECTED = "connected",
|
|
17
|
+
/** Microphone is open and capturing user speech. */
|
|
18
|
+
LISTENING = "listening",
|
|
19
|
+
/** Agent is processing the user's last utterance. */
|
|
20
|
+
THINKING = "thinking",
|
|
21
|
+
/** Agent is speaking. */
|
|
22
|
+
SPEAKING = "speaking",
|
|
23
|
+
/** Call is being torn down. */
|
|
24
|
+
DISCONNECTING = "disconnecting",
|
|
25
|
+
/** Call ended (terminal — same as IDLE for new calls). */
|
|
26
|
+
DISCONNECTED = "disconnected"
|
|
27
|
+
}
|
|
28
|
+
/** Speaker role on a transcript line. Mirrors ultravox-client's `Role`. */
|
|
29
|
+
declare enum Speaker {
|
|
30
|
+
USER = "user",
|
|
31
|
+
AGENT = "agent"
|
|
32
|
+
}
|
|
33
|
+
interface Transcript {
|
|
34
|
+
/** Spoken text. May grow over time as more partials arrive. */
|
|
35
|
+
text: string;
|
|
36
|
+
/** Whether this is the final, locked-in version of the utterance. */
|
|
37
|
+
isFinal: boolean;
|
|
38
|
+
/** Who spoke. */
|
|
39
|
+
speaker: Speaker;
|
|
40
|
+
/** voice or text. */
|
|
41
|
+
medium: "voice" | "text";
|
|
42
|
+
/** Sequence number for ordering. */
|
|
43
|
+
ordinal: number;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Audio format the data-connection (emotion / analytics) WebSocket expects.
|
|
47
|
+
* Matches the Ultravox `dataConnection.audioConfig` schema.
|
|
48
|
+
*/
|
|
49
|
+
interface AudioConfig {
|
|
50
|
+
/** PCM sample rate. Default 16000. */
|
|
51
|
+
sampleRate?: number;
|
|
52
|
+
/**
|
|
53
|
+
* "CHANNEL_MODE_SEPARATED" sends user/agent as separate channels;
|
|
54
|
+
* "CHANNEL_MODE_MIXED" sends a single mixed stream.
|
|
55
|
+
*/
|
|
56
|
+
channelMode?: "CHANNEL_MODE_SEPARATED" | "CHANNEL_MODE_MIXED";
|
|
57
|
+
}
|
|
58
|
+
/** Which speech-activity events the data WebSocket should receive. */
|
|
59
|
+
interface DataMessageFlags {
|
|
60
|
+
userStartedSpeaking?: boolean;
|
|
61
|
+
userStoppedSpeaking?: boolean;
|
|
62
|
+
agentStartedSpeaking?: boolean;
|
|
63
|
+
agentStoppedSpeaking?: boolean;
|
|
64
|
+
}
|
|
65
|
+
/**
|
|
66
|
+
* Optional data-connection block. When provided the eimi backend will open a
|
|
67
|
+
* server-to-server WebSocket so user audio bytes can be analyzed (e.g. for
|
|
68
|
+
* vocal emotion) and the result fed back as a data message.
|
|
69
|
+
*/
|
|
70
|
+
interface DataConnectionConfig {
|
|
71
|
+
/** wss:// URL the backend should open for raw user audio. */
|
|
72
|
+
websocketUrl: string;
|
|
73
|
+
audioConfig?: AudioConfig;
|
|
74
|
+
dataMessages?: DataMessageFlags;
|
|
75
|
+
}
|
|
76
|
+
/** Options for starting a call. */
|
|
77
|
+
interface VoiceCallOptions {
|
|
78
|
+
/**
|
|
79
|
+
* Base URL of the eimi backend. Example: "https://api.eimi.ai".
|
|
80
|
+
* Required unless `fetchJoinUrl` is provided.
|
|
81
|
+
*/
|
|
82
|
+
apiUrl?: string;
|
|
83
|
+
/**
|
|
84
|
+
* Bearer token used to authenticate with eimi backend. Can be a string or a
|
|
85
|
+
* function that resolves one (handy for short-lived JWTs you fetch from
|
|
86
|
+
* your own backend). Required unless `fetchJoinUrl` or `publicKey` is provided.
|
|
87
|
+
*/
|
|
88
|
+
authToken?: string | (() => string | Promise<string>);
|
|
89
|
+
/**
|
|
90
|
+
* Publishable API key ID for browser-direct (no-backend) auth.
|
|
91
|
+
*
|
|
92
|
+
* When provided the SDK sends `X-Public-Key: <publicKey>` and relies on the
|
|
93
|
+
* browser's `Origin` header for domain validation on the server. The server
|
|
94
|
+
* checks the origin against the developer's registered `allowedOrigins` list.
|
|
95
|
+
*
|
|
96
|
+
* Use this when you have no server-side proxy. Register your site's origin in
|
|
97
|
+
* the developer portal first. Hits `/v1/voice/joinurl` by default (override
|
|
98
|
+
* with `joinUrlPath`).
|
|
99
|
+
*
|
|
100
|
+
* Unlike `authToken`, this key ID is safe to embed in browser JS — it grants
|
|
101
|
+
* no access outside your registered origins.
|
|
102
|
+
*/
|
|
103
|
+
publicKey?: string;
|
|
104
|
+
/** Agent name configured in eimi backend, e.g. "DavidChiu". */
|
|
105
|
+
agentName?: string;
|
|
106
|
+
/** Username the call should be billed/attributed to. */
|
|
107
|
+
username?: string;
|
|
108
|
+
/** Optional emotion / analytics WebSocket config. */
|
|
109
|
+
dataConnection?: DataConnectionConfig;
|
|
110
|
+
/**
|
|
111
|
+
* Override the join-url path. Defaults to "/ultravox/secure/joinurl".
|
|
112
|
+
* Useful if your deployment routes through a proxy.
|
|
113
|
+
*/
|
|
114
|
+
joinUrlPath?: string;
|
|
115
|
+
/**
|
|
116
|
+
* Optional fully custom fetcher. If provided, the SDK calls this function
|
|
117
|
+
* instead of building a request itself. Must resolve to `{ joinUrl, callId? }`.
|
|
118
|
+
* When this is provided, `apiUrl` and `authToken` are not required.
|
|
119
|
+
*/
|
|
120
|
+
fetchJoinUrl?: () => Promise<JoinUrlResponse>;
|
|
121
|
+
/** Extra fields forwarded in the join-url request body. */
|
|
122
|
+
extraJoinUrlBody?: Record<string, unknown>;
|
|
123
|
+
/**
|
|
124
|
+
* Pattern used to extract an emotion label out of `experimental_message`
|
|
125
|
+
* data payloads. Default looks for "[EMOTION_CONTEXT] ... : <label>".
|
|
126
|
+
* The first capture group becomes the emotion label.
|
|
127
|
+
*/
|
|
128
|
+
emotionPattern?: RegExp;
|
|
129
|
+
/**
|
|
130
|
+
* Optional polling callback for emotion data. When provided, the SDK polls
|
|
131
|
+
* this function at `emotionPollIntervalMs` while the call is live.
|
|
132
|
+
* Use this when emotion is injected server-side (not via experimental_message).
|
|
133
|
+
* Receives the `callId` and optional `sessionToken` from the join response.
|
|
134
|
+
* Should return the emotion label string or null.
|
|
135
|
+
*/
|
|
136
|
+
pollEmotion?: (callId: string, sessionToken?: string) => Promise<string | null>;
|
|
137
|
+
/**
|
|
138
|
+
* How often (ms) to call `pollEmotion`. Default 15000 (15 s).
|
|
139
|
+
* Ignored if `pollEmotion` is not set.
|
|
140
|
+
*/
|
|
141
|
+
emotionPollIntervalMs?: number;
|
|
142
|
+
/** AudioContext to reuse. The SDK will create one if omitted. */
|
|
143
|
+
audioContext?: AudioContext;
|
|
144
|
+
/** Pass-through to ultravox-client. */
|
|
145
|
+
additionalMessages?: Set<string>;
|
|
146
|
+
}
|
|
147
|
+
/** Whatever the eimi backend (or your custom fetcher) returns. */
|
|
148
|
+
interface JoinUrlResponse {
|
|
149
|
+
joinUrl: string;
|
|
150
|
+
callId?: string;
|
|
151
|
+
/**
|
|
152
|
+
* Short-lived token scoped to this call. Returned by eimi backend when
|
|
153
|
+
* session-based auth is configured. Used by `pollEmotion` for direct
|
|
154
|
+
* browser→backend calls without needing a service token.
|
|
155
|
+
*/
|
|
156
|
+
sessionToken?: string;
|
|
157
|
+
/**
|
|
158
|
+
* Server-side summary of whether the data connection / emotion bridge was
|
|
159
|
+
* wired up. Surface mirrors what eimi backend currently returns.
|
|
160
|
+
*/
|
|
161
|
+
emotion?: {
|
|
162
|
+
dataConnectionEnabled?: boolean;
|
|
163
|
+
audioEnabled?: boolean;
|
|
164
|
+
emotionBridgeConfigured?: boolean;
|
|
165
|
+
autoSendEnabled?: boolean;
|
|
166
|
+
[k: string]: unknown;
|
|
167
|
+
};
|
|
168
|
+
[k: string]: unknown;
|
|
169
|
+
}
|
|
170
|
+
/** Map of event name -> listener payload type for the VoiceCall emitter. */
|
|
171
|
+
interface VoiceCallEvents {
|
|
172
|
+
/** Coarse call status changed. Always fires on real transitions. */
|
|
173
|
+
status: CallStatus;
|
|
174
|
+
/** Underlying Ultravox status changed (kept for power users). */
|
|
175
|
+
raw_status: string;
|
|
176
|
+
/** A transcript was added or updated. */
|
|
177
|
+
transcript: Transcript;
|
|
178
|
+
/** Full transcript array snapshot, fired after every transcript update. */
|
|
179
|
+
transcripts: Transcript[];
|
|
180
|
+
/** Vocal emotion label extracted from a data message or poll. */
|
|
181
|
+
emotion: {
|
|
182
|
+
label: string;
|
|
183
|
+
raw: unknown;
|
|
184
|
+
};
|
|
185
|
+
/** Any data message from the agent / data connection. */
|
|
186
|
+
data_message: unknown;
|
|
187
|
+
/** Mic mute state changed. */
|
|
188
|
+
mic_muted: boolean;
|
|
189
|
+
/** Speaker mute state changed. */
|
|
190
|
+
speaker_muted: boolean;
|
|
191
|
+
/** Agent has saved/persisted contact info (heuristic on transcript). */
|
|
192
|
+
contact_saved: void;
|
|
193
|
+
/** A non-fatal warning (e.g., emotion bridge not configured). */
|
|
194
|
+
warning: string;
|
|
195
|
+
/** A fatal error during start/operation. */
|
|
196
|
+
error: Error;
|
|
197
|
+
/** Call has fully ended. */
|
|
198
|
+
ended: void;
|
|
199
|
+
}
|
|
200
|
+
/** Type of listener for a given VoiceCall event. */
|
|
201
|
+
type VoiceCallListener<K extends keyof VoiceCallEvents> = (payload: VoiceCallEvents[K]) => void;
|
|
202
|
+
|
|
203
|
+
/**
|
|
204
|
+
* High-level voice call wrapper.
|
|
205
|
+
*
|
|
206
|
+
* ```ts
|
|
207
|
+
* const call = new VoiceCall({ apiUrl, authToken, agentName, username });
|
|
208
|
+
* call.on("status", s => console.log(s));
|
|
209
|
+
* call.on("transcript", t => console.log(t.speaker, t.text));
|
|
210
|
+
* call.on("emotion", e => console.log("emotion:", e.label));
|
|
211
|
+
* await call.start();
|
|
212
|
+
* // ... later
|
|
213
|
+
* await call.end();
|
|
214
|
+
* ```
|
|
215
|
+
*/
|
|
216
|
+
declare class VoiceCall {
|
|
217
|
+
private readonly emitter;
|
|
218
|
+
private readonly opts;
|
|
219
|
+
private session;
|
|
220
|
+
private _status;
|
|
221
|
+
private _callId;
|
|
222
|
+
private _sessionToken;
|
|
223
|
+
private _transcripts;
|
|
224
|
+
private _lastEmotion;
|
|
225
|
+
private _contactSaved;
|
|
226
|
+
private _starting;
|
|
227
|
+
private _emotionMeta;
|
|
228
|
+
private _pollTimer;
|
|
229
|
+
constructor(opts: VoiceCallOptions);
|
|
230
|
+
get status(): CallStatus;
|
|
231
|
+
get callId(): string | null;
|
|
232
|
+
get sessionToken(): string | null;
|
|
233
|
+
get transcripts(): Transcript[];
|
|
234
|
+
get lastEmotion(): string | null;
|
|
235
|
+
get contactSaved(): boolean;
|
|
236
|
+
get isMicMuted(): boolean;
|
|
237
|
+
get isSpeakerMuted(): boolean;
|
|
238
|
+
/** Server-reported wiring info from the join-url response, if any. */
|
|
239
|
+
get emotionMeta(): JoinUrlResponse["emotion"] | null;
|
|
240
|
+
/** Underlying ultravox-client session. Use sparingly — for power users. */
|
|
241
|
+
get rawSession(): UltravoxSession | null;
|
|
242
|
+
on<K extends keyof VoiceCallEvents>(event: K, listener: VoiceCallListener<K>): () => void;
|
|
243
|
+
off<K extends keyof VoiceCallEvents>(event: K, listener: VoiceCallListener<K>): void;
|
|
244
|
+
once<K extends keyof VoiceCallEvents>(event: K, listener: VoiceCallListener<K>): () => void;
|
|
245
|
+
/**
|
|
246
|
+
* Fetches a joinUrl from the backend, opens an Ultravox session, and starts
|
|
247
|
+
* the call. Resolves once `joinCall` has been kicked off (the call goes
|
|
248
|
+
* "live" asynchronously via status events).
|
|
249
|
+
*/
|
|
250
|
+
start(): Promise<void>;
|
|
251
|
+
/** Hangs up. Resolves when ultravox-client confirms disconnection. */
|
|
252
|
+
end(): Promise<void>;
|
|
253
|
+
muteMic(): void;
|
|
254
|
+
unmuteMic(): void;
|
|
255
|
+
toggleMicMute(): boolean;
|
|
256
|
+
muteSpeaker(): void;
|
|
257
|
+
unmuteSpeaker(): void;
|
|
258
|
+
toggleSpeakerMute(): boolean;
|
|
259
|
+
/** Sends a text message into the call (no spoken audio from the user). */
|
|
260
|
+
sendText(text: string, deferResponse?: boolean): void;
|
|
261
|
+
/** Sends an arbitrary data message over Ultravox's data channel. */
|
|
262
|
+
sendData(obj: unknown): void;
|
|
263
|
+
/** Removes all listeners and aborts any active session. */
|
|
264
|
+
dispose(): void;
|
|
265
|
+
private resetMutableState;
|
|
266
|
+
private attachSessionListeners;
|
|
267
|
+
private handleStatusChange;
|
|
268
|
+
private handleTranscripts;
|
|
269
|
+
private handleDataMessage;
|
|
270
|
+
private startEmotionPolling;
|
|
271
|
+
private stopEmotionPolling;
|
|
272
|
+
private setStatus;
|
|
273
|
+
private surfaceEmotionWarnings;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
export { type AudioConfig as A, CallStatus as C, type DataConnectionConfig as D, type JoinUrlResponse as J, Speaker as S, type Transcript as T, type VoiceCallOptions as V, type DataMessageFlags as a, VoiceCall as b, type VoiceCallEvents as c, type VoiceCallListener as d };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { V as VoiceCallOptions, J as JoinUrlResponse } from './VoiceCall-_BBARIQT.js';
|
|
2
|
+
export { A as AudioConfig, C as CallStatus, D as DataConnectionConfig, a as DataMessageFlags, S as Speaker, T as Transcript, b as VoiceCall, c as VoiceCallEvents, d as VoiceCallListener } from './VoiceCall-_BBARIQT.js';
|
|
3
|
+
import 'ultravox-client';
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* POSTs to the join-url endpoint and returns the parsed response.
|
|
7
|
+
*
|
|
8
|
+
* Auth resolution order:
|
|
9
|
+
* 1. `fetchJoinUrl` — fully custom fetcher, takes precedence over everything.
|
|
10
|
+
* 2. `publicKey` — browser-direct auth via `X-Public-Key` header + browser Origin.
|
|
11
|
+
* Hits `/v1/voice/joinurl` (the new SDK-aware endpoint).
|
|
12
|
+
* 3. `authToken` — Bearer JWT, used with the legacy `/ultravox/secure/joinurl` endpoint
|
|
13
|
+
* (or a custom `joinUrlPath`). Suitable for server-side / Netlify fn use.
|
|
14
|
+
*
|
|
15
|
+
* Throws on non-2xx or missing joinUrl.
|
|
16
|
+
*/
|
|
17
|
+
declare function fetchJoinUrl(opts: VoiceCallOptions): Promise<JoinUrlResponse>;
|
|
18
|
+
|
|
19
|
+
export { JoinUrlResponse, VoiceCallOptions, fetchJoinUrl };
|