@oshara/voice-sdk 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +198 -0
- package/dist/appearance-CNWT8x1G.cjs +2 -0
- package/dist/appearance-CNWT8x1G.cjs.map +1 -0
- package/dist/appearance-i6QBkpCk.js +650 -0
- package/dist/appearance-i6QBkpCk.js.map +1 -0
- package/dist/consent-CK9VXNPa.js +54 -0
- package/dist/consent-CK9VXNPa.js.map +1 -0
- package/dist/consent-D7QNSkQD.cjs +2 -0
- package/dist/consent-D7QNSkQD.cjs.map +1 -0
- package/dist/core/analytics.d.ts +30 -0
- package/dist/core/appearance.d.ts +113 -0
- package/dist/core/audioSettings.d.ts +69 -0
- package/dist/core/consent.d.ts +17 -0
- package/dist/core/createVoiceAgent.d.ts +79 -0
- package/dist/core/events.d.ts +103 -0
- package/dist/core/formController.d.ts +28 -0
- package/dist/core/forms.d.ts +235 -0
- package/dist/core/index.d.ts +29 -0
- package/dist/core/prevContext.d.ts +26 -0
- package/dist/core/transport.d.ts +30 -0
- package/dist/core/types.d.ts +49 -0
- package/dist/core/voice.d.ts +79 -0
- package/dist/createVoiceAgent-BM3HODS6.js +1058 -0
- package/dist/createVoiceAgent-BM3HODS6.js.map +1 -0
- package/dist/createVoiceAgent-CJWxWzz6.cjs +4 -0
- package/dist/createVoiceAgent-CJWxWzz6.cjs.map +1 -0
- package/dist/index.cjs +2 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.js +44 -0
- package/dist/index.js.map +1 -0
- package/dist/react/index.d.ts +60 -0
- package/dist/react.cjs +2 -0
- package/dist/react.cjs.map +1 -0
- package/dist/react.js +115 -0
- package/dist/react.js.map +1 -0
- package/dist/styles.css +1838 -0
- package/dist/ui/index.d.ts +21 -0
- package/dist/ui/ui.d.ts +165 -0
- package/dist/ui.cjs +284 -0
- package/dist/ui.cjs.map +1 -0
- package/dist/ui.js +1153 -0
- package/dist/ui.js.map +1 -0
- package/package.json +67 -0
- package/src/core/analytics.ts +111 -0
- package/src/core/appearance.ts +464 -0
- package/src/core/audioSettings.ts +180 -0
- package/src/core/consent.ts +78 -0
- package/src/core/createVoiceAgent.ts +280 -0
- package/src/core/events.ts +120 -0
- package/src/core/formController.ts +317 -0
- package/src/core/forms.ts +861 -0
- package/src/core/index.ts +121 -0
- package/src/core/prevContext.ts +153 -0
- package/src/core/transport.ts +118 -0
- package/src/core/types.ts +66 -0
- package/src/core/voice.ts +1179 -0
- package/src/react/index.ts +238 -0
- package/src/ui/index.ts +507 -0
- package/src/ui/styles.css +1838 -0
- package/src/ui/ui.ts +1672 -0
- package/src/vite-env.d.ts +10 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
import {
|
|
2
|
+
DEFAULT_FORM_DEFINITIONS,
|
|
3
|
+
FormDefinition,
|
|
4
|
+
normalizeFormDefinitions,
|
|
5
|
+
} from "./forms";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Per-agent widget appearance configuration.
|
|
9
|
+
*
|
|
10
|
+
* The widget fetches this from `GET {apiUrl}/api/agents/{slug}/appearance/`
|
|
11
|
+
* (override with `data-appearance-url`) on init, falling back to the defaults
|
|
12
|
+
* below if the request fails or any field is missing.
|
|
13
|
+
*
|
|
14
|
+
* Every field is optional on the wire — partial responses are merged on top of
|
|
15
|
+
* the defaults so the widget never breaks if the backend hasn't been updated.
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
export type WidgetPosition =
|
|
19
|
+
| "bottom-right"
|
|
20
|
+
| "bottom-left"
|
|
21
|
+
| "top-right"
|
|
22
|
+
| "top-left";
|
|
23
|
+
|
|
24
|
+
export interface AppearanceTheme {
|
|
25
|
+
primary_color: string;
|
|
26
|
+
accent_color: string;
|
|
27
|
+
background_color: string;
|
|
28
|
+
text_color: string;
|
|
29
|
+
user_bubble_color: string;
|
|
30
|
+
user_bubble_text_color: string;
|
|
31
|
+
agent_bubble_color: string;
|
|
32
|
+
agent_bubble_text_color: string;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface AppearanceDimensions {
|
|
36
|
+
fab_size: number;
|
|
37
|
+
panel_width: number;
|
|
38
|
+
panel_height: number;
|
|
39
|
+
border_radius: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export interface AppearanceLayout {
|
|
43
|
+
position: WidgetPosition;
|
|
44
|
+
font_family: string;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
export interface AppearanceLabels {
|
|
48
|
+
idle: string;
|
|
49
|
+
connecting: string;
|
|
50
|
+
listening: string;
|
|
51
|
+
speaking: string;
|
|
52
|
+
/** Shown while the agent is processing (running STT→LLM→tool→TTS). The
|
|
53
|
+
* backend may override this with a per-tool contextual label
|
|
54
|
+
* ("Searching the knowledge base…") via voice.agent_status events. */
|
|
55
|
+
thinking: string;
|
|
56
|
+
muted: string;
|
|
57
|
+
call_ended: string;
|
|
58
|
+
transcript_placeholder: string;
|
|
59
|
+
language_label: string;
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
export interface AppearanceLanguage {
|
|
63
|
+
/** BCP-47-ish short code sent to the backend (e.g. "en", "ne"). */
|
|
64
|
+
code: string;
|
|
65
|
+
/** English label, e.g. "English", "Nepali". */
|
|
66
|
+
label: string;
|
|
67
|
+
/** Native script label, e.g. "English", "नेपाली". Falls back to label. */
|
|
68
|
+
native_label: string;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export interface AppearanceConfig {
|
|
72
|
+
name: string;
|
|
73
|
+
subtitle: string;
|
|
74
|
+
logo_url: string;
|
|
75
|
+
fab_label: string;
|
|
76
|
+
fab_sublabel: string;
|
|
77
|
+
start_button_text: string;
|
|
78
|
+
powered_by_text: string;
|
|
79
|
+
powered_by_url: string;
|
|
80
|
+
theme: AppearanceTheme;
|
|
81
|
+
dimensions: AppearanceDimensions;
|
|
82
|
+
layout: AppearanceLayout;
|
|
83
|
+
labels: AppearanceLabels;
|
|
84
|
+
languages: AppearanceLanguage[];
|
|
85
|
+
default_language: string;
|
|
86
|
+
/** Hard cap on a single voice call, in seconds. 0 = no limit. */
|
|
87
|
+
max_call_seconds: number;
|
|
88
|
+
/** URL to the terms & conditions / privacy policy page. When set, a
|
|
89
|
+
* consent line is shown beneath the Start button stating that starting
|
|
90
|
+
* the call accepts these terms. Empty disables the consent UI. */
|
|
91
|
+
terms_url: string;
|
|
92
|
+
/** Visible label for the terms link (default "Terms & Conditions"). */
|
|
93
|
+
terms_label: string;
|
|
94
|
+
/** Prefix shown before the terms link (default
|
|
95
|
+
* "By starting the call, you agree to our"). */
|
|
96
|
+
consent_text: string;
|
|
97
|
+
/**
|
|
98
|
+
* Extra form definitions (book-a-demo, reservation, lead capture, …) the
|
|
99
|
+
* agent can open mid-call by publishing a LiveKit data message. Built-in
|
|
100
|
+
* defaults are always available; entries here override defaults by id.
|
|
101
|
+
* See `forms.ts` for the wire protocol.
|
|
102
|
+
*/
|
|
103
|
+
forms: FormDefinition[];
|
|
104
|
+
/**
|
|
105
|
+
* Whether to expose the in-widget audio settings drawer (mic/speaker
|
|
106
|
+
* selection, NC/AEC/AGC toggles, headphone mode, mic-level meter,
|
|
107
|
+
* diagnostics row). Defaults to true. Set false to hide the gear icon
|
|
108
|
+
* entirely for a kiosk-style minimal UI.
|
|
109
|
+
*/
|
|
110
|
+
show_audio_settings: boolean;
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
export const DEFAULT_APPEARANCE: AppearanceConfig = {
|
|
114
|
+
name: "Voice Assistant",
|
|
115
|
+
subtitle: "Tap below to start a voice conversation.",
|
|
116
|
+
logo_url: "",
|
|
117
|
+
// Empty by default — only show FAB text when the backend explicitly
|
|
118
|
+
// configures fab_label/fab_sublabel. Otherwise the FAB is icon-only.
|
|
119
|
+
fab_label: "",
|
|
120
|
+
fab_sublabel: "",
|
|
121
|
+
start_button_text: "Start Call",
|
|
122
|
+
powered_by_text: "Powered by Oshara.ai",
|
|
123
|
+
powered_by_url: "",
|
|
124
|
+
theme: {
|
|
125
|
+
primary_color: "#6366F1",
|
|
126
|
+
accent_color: "#22D3EE",
|
|
127
|
+
background_color: "#FFFFFF",
|
|
128
|
+
text_color: "#0F172A",
|
|
129
|
+
user_bubble_color: "#6366F1",
|
|
130
|
+
user_bubble_text_color: "#FFFFFF",
|
|
131
|
+
agent_bubble_color: "#F1F5F9",
|
|
132
|
+
agent_bubble_text_color: "#0F172A",
|
|
133
|
+
},
|
|
134
|
+
dimensions: {
|
|
135
|
+
fab_size: 64,
|
|
136
|
+
panel_width: 380,
|
|
137
|
+
panel_height: 620,
|
|
138
|
+
border_radius: 24,
|
|
139
|
+
},
|
|
140
|
+
layout: {
|
|
141
|
+
position: "bottom-right",
|
|
142
|
+
font_family:
|
|
143
|
+
"Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif",
|
|
144
|
+
},
|
|
145
|
+
labels: {
|
|
146
|
+
idle: "Idle",
|
|
147
|
+
connecting: "Connecting…",
|
|
148
|
+
listening: "Listening",
|
|
149
|
+
speaking: "Speaking…",
|
|
150
|
+
thinking: "Thinking…",
|
|
151
|
+
muted: "Microphone muted",
|
|
152
|
+
call_ended: "Call ended",
|
|
153
|
+
transcript_placeholder: "Your live transcript will appear here",
|
|
154
|
+
language_label: "Language",
|
|
155
|
+
},
|
|
156
|
+
languages: [
|
|
157
|
+
{ code: "en", label: "English", native_label: "English" },
|
|
158
|
+
{ code: "ne", label: "Nepali", native_label: "नेपाली" },
|
|
159
|
+
],
|
|
160
|
+
default_language: "en",
|
|
161
|
+
max_call_seconds: 0,
|
|
162
|
+
terms_url: "",
|
|
163
|
+
terms_label: "Terms & Conditions",
|
|
164
|
+
consent_text: "By starting the call, you agree to our",
|
|
165
|
+
forms: DEFAULT_FORM_DEFINITIONS,
|
|
166
|
+
show_audio_settings: true,
|
|
167
|
+
};
|
|
168
|
+
|
|
169
|
+
const POSITIONS: WidgetPosition[] = [
|
|
170
|
+
"bottom-right",
|
|
171
|
+
"bottom-left",
|
|
172
|
+
"top-right",
|
|
173
|
+
"top-left",
|
|
174
|
+
];
|
|
175
|
+
|
|
176
|
+
/**
|
|
177
|
+
* Deep-merge a partial server response on top of DEFAULT_APPEARANCE.
|
|
178
|
+
* Only known keys are accepted; unknown keys are ignored.
|
|
179
|
+
*/
|
|
180
|
+
export function mergeAppearance(
|
|
181
|
+
partial: Partial<AppearanceConfig> | null | undefined,
|
|
182
|
+
): AppearanceConfig {
|
|
183
|
+
if (!partial || typeof partial !== "object") return DEFAULT_APPEARANCE;
|
|
184
|
+
const safe = asRecord(partial);
|
|
185
|
+
const theme = asRecord(safe.theme);
|
|
186
|
+
const dimensions = asRecord(safe.dimensions);
|
|
187
|
+
const layout = asRecord(safe.layout);
|
|
188
|
+
const labels = asRecord(safe.labels);
|
|
189
|
+
|
|
190
|
+
return {
|
|
191
|
+
name: nonEmptyString(safe.name) ?? DEFAULT_APPEARANCE.name,
|
|
192
|
+
subtitle: stringOrDefault(safe.subtitle, DEFAULT_APPEARANCE.subtitle),
|
|
193
|
+
logo_url: stringOrDefault(safe.logo_url, DEFAULT_APPEARANCE.logo_url),
|
|
194
|
+
fab_label: stringOrDefault(safe.fab_label, DEFAULT_APPEARANCE.fab_label),
|
|
195
|
+
fab_sublabel: stringOrDefault(
|
|
196
|
+
safe.fab_sublabel,
|
|
197
|
+
DEFAULT_APPEARANCE.fab_sublabel,
|
|
198
|
+
),
|
|
199
|
+
start_button_text:
|
|
200
|
+
nonEmptyString(safe.start_button_text) ??
|
|
201
|
+
DEFAULT_APPEARANCE.start_button_text,
|
|
202
|
+
powered_by_text: stringOrDefault(
|
|
203
|
+
safe.powered_by_text,
|
|
204
|
+
DEFAULT_APPEARANCE.powered_by_text,
|
|
205
|
+
),
|
|
206
|
+
powered_by_url: stringOrDefault(
|
|
207
|
+
safe.powered_by_url,
|
|
208
|
+
DEFAULT_APPEARANCE.powered_by_url,
|
|
209
|
+
),
|
|
210
|
+
theme: {
|
|
211
|
+
primary_color:
|
|
212
|
+
nonEmptyString(theme.primary_color) ??
|
|
213
|
+
DEFAULT_APPEARANCE.theme.primary_color,
|
|
214
|
+
accent_color:
|
|
215
|
+
nonEmptyString(theme.accent_color) ??
|
|
216
|
+
DEFAULT_APPEARANCE.theme.accent_color,
|
|
217
|
+
background_color:
|
|
218
|
+
nonEmptyString(theme.background_color) ??
|
|
219
|
+
DEFAULT_APPEARANCE.theme.background_color,
|
|
220
|
+
text_color:
|
|
221
|
+
nonEmptyString(theme.text_color) ?? DEFAULT_APPEARANCE.theme.text_color,
|
|
222
|
+
user_bubble_color:
|
|
223
|
+
nonEmptyString(theme.user_bubble_color) ??
|
|
224
|
+
DEFAULT_APPEARANCE.theme.user_bubble_color,
|
|
225
|
+
user_bubble_text_color:
|
|
226
|
+
nonEmptyString(theme.user_bubble_text_color) ??
|
|
227
|
+
DEFAULT_APPEARANCE.theme.user_bubble_text_color,
|
|
228
|
+
agent_bubble_color:
|
|
229
|
+
nonEmptyString(theme.agent_bubble_color) ??
|
|
230
|
+
DEFAULT_APPEARANCE.theme.agent_bubble_color,
|
|
231
|
+
agent_bubble_text_color:
|
|
232
|
+
nonEmptyString(theme.agent_bubble_text_color) ??
|
|
233
|
+
DEFAULT_APPEARANCE.theme.agent_bubble_text_color,
|
|
234
|
+
},
|
|
235
|
+
dimensions: {
|
|
236
|
+
fab_size:
|
|
237
|
+
pixelOrNumber(dimensions.fab_size) ??
|
|
238
|
+
DEFAULT_APPEARANCE.dimensions.fab_size,
|
|
239
|
+
panel_width:
|
|
240
|
+
pixelOrNumber(dimensions.panel_width) ??
|
|
241
|
+
DEFAULT_APPEARANCE.dimensions.panel_width,
|
|
242
|
+
panel_height:
|
|
243
|
+
pixelOrNumber(dimensions.panel_height) ??
|
|
244
|
+
DEFAULT_APPEARANCE.dimensions.panel_height,
|
|
245
|
+
border_radius:
|
|
246
|
+
pixelOrNumber(dimensions.border_radius) ??
|
|
247
|
+
DEFAULT_APPEARANCE.dimensions.border_radius,
|
|
248
|
+
},
|
|
249
|
+
layout: {
|
|
250
|
+
position:
|
|
251
|
+
enumOrUndefined(POSITIONS, layout.position) ??
|
|
252
|
+
DEFAULT_APPEARANCE.layout.position,
|
|
253
|
+
font_family: stringOrDefault(
|
|
254
|
+
layout.font_family,
|
|
255
|
+
DEFAULT_APPEARANCE.layout.font_family,
|
|
256
|
+
),
|
|
257
|
+
},
|
|
258
|
+
labels: {
|
|
259
|
+
idle: nonEmptyString(labels.idle) ?? DEFAULT_APPEARANCE.labels.idle,
|
|
260
|
+
connecting:
|
|
261
|
+
nonEmptyString(labels.connecting) ??
|
|
262
|
+
DEFAULT_APPEARANCE.labels.connecting,
|
|
263
|
+
listening:
|
|
264
|
+
nonEmptyString(labels.listening) ?? DEFAULT_APPEARANCE.labels.listening,
|
|
265
|
+
speaking:
|
|
266
|
+
nonEmptyString(labels.speaking) ?? DEFAULT_APPEARANCE.labels.speaking,
|
|
267
|
+
thinking:
|
|
268
|
+
nonEmptyString(labels.thinking) ?? DEFAULT_APPEARANCE.labels.thinking,
|
|
269
|
+
muted: nonEmptyString(labels.muted) ?? DEFAULT_APPEARANCE.labels.muted,
|
|
270
|
+
call_ended:
|
|
271
|
+
nonEmptyString(labels.call_ended) ??
|
|
272
|
+
DEFAULT_APPEARANCE.labels.call_ended,
|
|
273
|
+
transcript_placeholder: stringOrDefault(
|
|
274
|
+
labels.transcript_placeholder,
|
|
275
|
+
DEFAULT_APPEARANCE.labels.transcript_placeholder,
|
|
276
|
+
),
|
|
277
|
+
language_label:
|
|
278
|
+
nonEmptyString(labels.language_label) ??
|
|
279
|
+
DEFAULT_APPEARANCE.labels.language_label,
|
|
280
|
+
},
|
|
281
|
+
languages: normalizeLanguages(safe.languages),
|
|
282
|
+
default_language:
|
|
283
|
+
nonEmptyString(safe.default_language) ??
|
|
284
|
+
DEFAULT_APPEARANCE.default_language,
|
|
285
|
+
max_call_seconds:
|
|
286
|
+
nonNegativeNumber(safe.max_call_seconds) ??
|
|
287
|
+
DEFAULT_APPEARANCE.max_call_seconds,
|
|
288
|
+
terms_url: stringOrDefault(safe.terms_url, DEFAULT_APPEARANCE.terms_url),
|
|
289
|
+
terms_label:
|
|
290
|
+
nonEmptyString(safe.terms_label) ?? DEFAULT_APPEARANCE.terms_label,
|
|
291
|
+
consent_text: stringOrDefault(
|
|
292
|
+
safe.consent_text,
|
|
293
|
+
DEFAULT_APPEARANCE.consent_text,
|
|
294
|
+
),
|
|
295
|
+
forms: mergeForms(safe.forms),
|
|
296
|
+
show_audio_settings:
|
|
297
|
+
typeof safe.show_audio_settings === "boolean"
|
|
298
|
+
? safe.show_audio_settings
|
|
299
|
+
: DEFAULT_APPEARANCE.show_audio_settings,
|
|
300
|
+
};
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
function mergeForms(value: unknown): FormDefinition[] {
|
|
304
|
+
const custom = normalizeFormDefinitions(value);
|
|
305
|
+
const map = new Map<string, FormDefinition>();
|
|
306
|
+
for (const f of DEFAULT_FORM_DEFINITIONS) map.set(f.id, f);
|
|
307
|
+
for (const f of custom) map.set(f.id, f);
|
|
308
|
+
return Array.from(map.values());
|
|
309
|
+
}
|
|
310
|
+
|
|
311
|
+
function normalizeLanguages(value: unknown): AppearanceLanguage[] {
|
|
312
|
+
if (!Array.isArray(value)) return DEFAULT_APPEARANCE.languages;
|
|
313
|
+
const seen = new Set<string>();
|
|
314
|
+
const out: AppearanceLanguage[] = [];
|
|
315
|
+
for (const raw of value) {
|
|
316
|
+
if (!isPlainObject(raw)) continue;
|
|
317
|
+
const code = nonEmptyString(raw.code);
|
|
318
|
+
if (!code || seen.has(code)) continue;
|
|
319
|
+
const label = nonEmptyString(raw.label) ?? code.toUpperCase();
|
|
320
|
+
const native = nonEmptyString(raw.native_label) ?? label;
|
|
321
|
+
seen.add(code);
|
|
322
|
+
out.push({ code, label, native_label: native });
|
|
323
|
+
}
|
|
324
|
+
return out.length ? out : DEFAULT_APPEARANCE.languages;
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
export interface FetchAppearanceArgs {
|
|
328
|
+
/** Resolved URL (already substituted) — takes precedence when present. */
|
|
329
|
+
appearanceUrl: string;
|
|
330
|
+
/** Falls back to `${apiUrl}/api/agents/${slug}/appearance/`. */
|
|
331
|
+
apiUrl: string;
|
|
332
|
+
slug: string;
|
|
333
|
+
/** Secret key sent as `x-api-key` (server/trusted mode). */
|
|
334
|
+
apiKey?: string;
|
|
335
|
+
/** Custom fetch (Node <18 / testing). Defaults to globalThis.fetch. */
|
|
336
|
+
fetch?: typeof fetch;
|
|
337
|
+
}
|
|
338
|
+
|
|
339
|
+
export async function fetchAppearance(
|
|
340
|
+
args: FetchAppearanceArgs,
|
|
341
|
+
): Promise<AppearanceConfig> {
|
|
342
|
+
const url =
|
|
343
|
+
args.appearanceUrl ||
|
|
344
|
+
`${args.apiUrl.replace(/\/$/, "")}/api/agents/${encodeURIComponent(
|
|
345
|
+
args.slug,
|
|
346
|
+
)}/appearance/`;
|
|
347
|
+
|
|
348
|
+
const headers: Record<string, string> = { Accept: "application/json" };
|
|
349
|
+
if (args.apiKey?.trim()) headers["x-api-key"] = args.apiKey.trim();
|
|
350
|
+
const doFetch =
|
|
351
|
+
args.fetch ??
|
|
352
|
+
(typeof globalThis.fetch === "function"
|
|
353
|
+
? globalThis.fetch.bind(globalThis)
|
|
354
|
+
: null);
|
|
355
|
+
if (!doFetch) {
|
|
356
|
+
// eslint-disable-next-line no-console
|
|
357
|
+
console.warn("[voice-agent] no fetch available; using default appearance");
|
|
358
|
+
return DEFAULT_APPEARANCE;
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// eslint-disable-next-line no-console
|
|
362
|
+
console.info("[voice-agent] fetching appearance from", url);
|
|
363
|
+
|
|
364
|
+
try {
|
|
365
|
+
const r = await doFetch(url, { method: "GET", headers });
|
|
366
|
+
if (!r.ok) {
|
|
367
|
+
// eslint-disable-next-line no-console
|
|
368
|
+
console.warn(
|
|
369
|
+
`[voice-agent] appearance fetch returned ${r.status}; using defaults`,
|
|
370
|
+
);
|
|
371
|
+
return DEFAULT_APPEARANCE;
|
|
372
|
+
}
|
|
373
|
+
const body: any = await r.json();
|
|
374
|
+
// eslint-disable-next-line no-console
|
|
375
|
+
console.info("[voice-agent] appearance response", body);
|
|
376
|
+
const payload = unwrapAppearanceResponse(body);
|
|
377
|
+
const merged = mergeAppearance(payload);
|
|
378
|
+
// eslint-disable-next-line no-console
|
|
379
|
+
console.info("[voice-agent] applied appearance", merged);
|
|
380
|
+
return merged;
|
|
381
|
+
} catch (err) {
|
|
382
|
+
// eslint-disable-next-line no-console
|
|
383
|
+
console.warn("[voice-agent] appearance fetch failed; using defaults:", err);
|
|
384
|
+
return DEFAULT_APPEARANCE;
|
|
385
|
+
}
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
function unwrapAppearanceResponse(
|
|
389
|
+
body: unknown,
|
|
390
|
+
): Partial<AppearanceConfig> | null {
|
|
391
|
+
if (!isPlainObject(body)) return null;
|
|
392
|
+
const safe = body as Record<string, unknown>;
|
|
393
|
+
|
|
394
|
+
const candidates: unknown[] = [
|
|
395
|
+
safe.data,
|
|
396
|
+
safe.appearance,
|
|
397
|
+
safe.widget_appearance,
|
|
398
|
+
safe,
|
|
399
|
+
];
|
|
400
|
+
for (const candidate of candidates) {
|
|
401
|
+
if (isPlainObject(candidate)) {
|
|
402
|
+
return candidate as Partial<AppearanceConfig>;
|
|
403
|
+
}
|
|
404
|
+
}
|
|
405
|
+
return null;
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
function isPlainObject(value: unknown): value is Record<string, unknown> {
|
|
409
|
+
return value !== null && typeof value === "object" && !Array.isArray(value);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
function asRecord(value: unknown): Record<string, unknown> {
|
|
413
|
+
return isPlainObject(value) ? value : {};
|
|
414
|
+
}
|
|
415
|
+
|
|
416
|
+
function nonEmptyString(value: unknown): string | undefined {
|
|
417
|
+
if (typeof value !== "string") return undefined;
|
|
418
|
+
const trimmed = value.trim();
|
|
419
|
+
return trimmed ? trimmed : undefined;
|
|
420
|
+
}
|
|
421
|
+
|
|
422
|
+
function stringOrDefault(value: unknown, fallback: string): string {
|
|
423
|
+
return typeof value === "string" ? value : fallback;
|
|
424
|
+
}
|
|
425
|
+
|
|
426
|
+
function finitePositiveNumber(value: unknown): number | undefined {
|
|
427
|
+
return typeof value === "number" && Number.isFinite(value) && value > 0
|
|
428
|
+
? value
|
|
429
|
+
: undefined;
|
|
430
|
+
}
|
|
431
|
+
|
|
432
|
+
function nonNegativeNumber(value: unknown): number | undefined {
|
|
433
|
+
if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
|
|
434
|
+
return value;
|
|
435
|
+
}
|
|
436
|
+
if (typeof value === "string") {
|
|
437
|
+
const trimmed = value.trim();
|
|
438
|
+
if (!trimmed) return undefined;
|
|
439
|
+
const parsed = Number(trimmed);
|
|
440
|
+
if (Number.isFinite(parsed) && parsed >= 0) return parsed;
|
|
441
|
+
}
|
|
442
|
+
return undefined;
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
function pixelOrNumber(value: unknown): number | undefined {
|
|
446
|
+
const direct = finitePositiveNumber(value);
|
|
447
|
+
if (direct !== undefined) return direct;
|
|
448
|
+
if (typeof value !== "string") return undefined;
|
|
449
|
+
|
|
450
|
+
const match = value.trim().match(/^(\d+(?:\.\d+)?)px$/i);
|
|
451
|
+
if (!match) return undefined;
|
|
452
|
+
|
|
453
|
+
const parsed = Number(match[1]);
|
|
454
|
+
return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
|
|
455
|
+
}
|
|
456
|
+
|
|
457
|
+
function enumOrUndefined<T extends string>(
|
|
458
|
+
allowed: readonly T[],
|
|
459
|
+
value: unknown,
|
|
460
|
+
): T | undefined {
|
|
461
|
+
return typeof value === "string" && allowed.includes(value as T)
|
|
462
|
+
? (value as T)
|
|
463
|
+
: undefined;
|
|
464
|
+
}
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Per-agent audio preferences for the voice widget.
|
|
3
|
+
*
|
|
4
|
+
* Persists the user's chosen mic/speaker devices, processor toggles, and
|
|
5
|
+
* speaker volume so subsequent calls open with the same setup. Reads &
|
|
6
|
+
* writes are namespaced by agent slug so different agents can hold
|
|
7
|
+
* different preferences in the same browser.
|
|
8
|
+
*/
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Choice of deep-learning noise-suppression engine layered on top of the
|
|
12
|
+
* browser's native NS. Picked per-user via the audio settings drawer.
|
|
13
|
+
*
|
|
14
|
+
* - `off` — rely on browser NS + voiceIsolation only.
|
|
15
|
+
* - `krisp` — @livekit/krisp-noise-filter (closed-source, free for LK).
|
|
16
|
+
* - `deepfilter` — deepfilternet3-noise-filter (mezonai/mezon-noise-suppression),
|
|
17
|
+
* open-source DeepFilterNet3 with a 0-100 strength knob.
|
|
18
|
+
*/
|
|
19
|
+
export type NoiseFilterEngine = "off" | "krisp" | "deepfilter";
|
|
20
|
+
|
|
21
|
+
export interface AudioPrefs {
|
|
22
|
+
/** Browser-native AEC. Standard MediaTrackSettings.echoCancellation. */
|
|
23
|
+
echoCancellation: boolean;
|
|
24
|
+
/** Browser-native noise suppression. Standard MediaTrackSettings. */
|
|
25
|
+
noiseSuppression: boolean;
|
|
26
|
+
/** Browser-native automatic gain control. */
|
|
27
|
+
autoGainControl: boolean;
|
|
28
|
+
/** Microsoft / Chrome non-standard hardware voice isolation. */
|
|
29
|
+
voiceIsolation: boolean;
|
|
30
|
+
/** Which deep-learning noise-suppression processor to attach. */
|
|
31
|
+
noiseFilter: NoiseFilterEngine;
|
|
32
|
+
/** DeepFilterNet3 strength 0-100 (ignored unless noiseFilter === "deepfilter"). */
|
|
33
|
+
deepFilterStrength: number;
|
|
34
|
+
/** Selected mic deviceId (empty string = system default). */
|
|
35
|
+
micDeviceId: string;
|
|
36
|
+
/** Selected speaker deviceId for setSinkId (empty = default). */
|
|
37
|
+
speakerDeviceId: string;
|
|
38
|
+
/** Speaker volume 0-100, applied to HTMLAudioElement.volume / 100. */
|
|
39
|
+
outputVolume: number;
|
|
40
|
+
/**
|
|
41
|
+
* When true, disables half-duplex ducking. The user wears headphones so
|
|
42
|
+
* the mic can't physically pick up the speaker — no need to mute them
|
|
43
|
+
* while the agent talks.
|
|
44
|
+
*/
|
|
45
|
+
headphonesMode: boolean;
|
|
46
|
+
/** Whether to show live transcription in the UI. */
|
|
47
|
+
transcriptionEnabled: boolean;
|
|
48
|
+
/** Whether to show the text input box during a call. */
|
|
49
|
+
textInputEnabled: boolean;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
export const DEFAULT_AUDIO_PREFS: AudioPrefs = {
|
|
53
|
+
echoCancellation: true,
|
|
54
|
+
noiseSuppression: true,
|
|
55
|
+
// Off by default: AGC boosts the trailing tail of an utterance, which
|
|
56
|
+
// confuses the agent's contextual turn detector ("user is still
|
|
57
|
+
// talking softly") and delays preemptive LLM generation. Users can
|
|
58
|
+
// toggle this on per-call from the audio settings drawer if they're
|
|
59
|
+
// soft-spoken and the agent isn't hearing them.
|
|
60
|
+
autoGainControl: false,
|
|
61
|
+
voiceIsolation: true,
|
|
62
|
+
noiseFilter: "deepfilter",
|
|
63
|
+
deepFilterStrength: 40,
|
|
64
|
+
micDeviceId: "",
|
|
65
|
+
speakerDeviceId: "",
|
|
66
|
+
outputVolume: 85,
|
|
67
|
+
headphonesMode: true,
|
|
68
|
+
transcriptionEnabled: false,
|
|
69
|
+
textInputEnabled: false,
|
|
70
|
+
};
|
|
71
|
+
|
|
72
|
+
const STORAGE_PREFIX = "voice-agent:audio-prefs:";
|
|
73
|
+
|
|
74
|
+
function storageKey(agentSlug: string): string {
|
|
75
|
+
return `${STORAGE_PREFIX}${agentSlug || "default"}`;
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
export function loadAudioPrefs(agentSlug: string): AudioPrefs {
|
|
79
|
+
try {
|
|
80
|
+
const raw = window.localStorage.getItem(storageKey(agentSlug));
|
|
81
|
+
if (!raw) return { ...DEFAULT_AUDIO_PREFS };
|
|
82
|
+
const parsed = JSON.parse(raw);
|
|
83
|
+
if (!parsed || typeof parsed !== "object") {
|
|
84
|
+
return { ...DEFAULT_AUDIO_PREFS };
|
|
85
|
+
}
|
|
86
|
+
return { ...DEFAULT_AUDIO_PREFS, ...normalize(parsed) };
|
|
87
|
+
} catch {
|
|
88
|
+
return { ...DEFAULT_AUDIO_PREFS };
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
export function saveAudioPrefs(agentSlug: string, prefs: AudioPrefs): void {
|
|
93
|
+
try {
|
|
94
|
+
window.localStorage.setItem(
|
|
95
|
+
storageKey(agentSlug),
|
|
96
|
+
JSON.stringify(normalize(prefs as unknown as Record<string, unknown>)),
|
|
97
|
+
);
|
|
98
|
+
} catch {
|
|
99
|
+
// Storage may be disabled (private mode, quota); preferences just
|
|
100
|
+
// won't survive a reload. Not worth surfacing to the user.
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
function normalize(input: Record<string, unknown>): Partial<AudioPrefs> {
|
|
105
|
+
const out: Partial<AudioPrefs> = {};
|
|
106
|
+
if (typeof input.echoCancellation === "boolean") out.echoCancellation = input.echoCancellation;
|
|
107
|
+
if (typeof input.noiseSuppression === "boolean") out.noiseSuppression = input.noiseSuppression;
|
|
108
|
+
if (typeof input.autoGainControl === "boolean") out.autoGainControl = input.autoGainControl;
|
|
109
|
+
if (typeof input.voiceIsolation === "boolean") out.voiceIsolation = input.voiceIsolation;
|
|
110
|
+
if (input.noiseFilter === "off" || input.noiseFilter === "krisp" || input.noiseFilter === "deepfilter") {
|
|
111
|
+
out.noiseFilter = input.noiseFilter;
|
|
112
|
+
} else if (typeof input.krispEnabled === "boolean") {
|
|
113
|
+
// Migration path from the pre-engine-picker AudioPrefs shape.
|
|
114
|
+
out.noiseFilter = input.krispEnabled ? "krisp" : "off";
|
|
115
|
+
}
|
|
116
|
+
if (typeof input.deepFilterStrength === "number" && Number.isFinite(input.deepFilterStrength)) {
|
|
117
|
+
out.deepFilterStrength = Math.max(0, Math.min(100, input.deepFilterStrength));
|
|
118
|
+
}
|
|
119
|
+
if (typeof input.micDeviceId === "string") out.micDeviceId = input.micDeviceId;
|
|
120
|
+
if (typeof input.speakerDeviceId === "string") out.speakerDeviceId = input.speakerDeviceId;
|
|
121
|
+
if (typeof input.outputVolume === "number" && Number.isFinite(input.outputVolume)) {
|
|
122
|
+
out.outputVolume = Math.max(0, Math.min(100, input.outputVolume));
|
|
123
|
+
}
|
|
124
|
+
if (typeof input.headphonesMode === "boolean") out.headphonesMode = input.headphonesMode;
|
|
125
|
+
if (typeof input.transcriptionEnabled === "boolean") out.transcriptionEnabled = input.transcriptionEnabled;
|
|
126
|
+
if (typeof input.textInputEnabled === "boolean") out.textInputEnabled = input.textInputEnabled;
|
|
127
|
+
return out;
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
export interface AudioDevices {
|
|
131
|
+
inputs: MediaDeviceInfo[];
|
|
132
|
+
outputs: MediaDeviceInfo[];
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
/**
|
|
136
|
+
* Enumerate audio I/O devices. Device labels are only populated after the
|
|
137
|
+
* user has granted mic permission for the page; before that the labels
|
|
138
|
+
* come back empty and the caller should show a placeholder.
|
|
139
|
+
*/
|
|
140
|
+
export async function enumerateAudioDevices(): Promise<AudioDevices> {
|
|
141
|
+
if (!navigator.mediaDevices?.enumerateDevices) {
|
|
142
|
+
return { inputs: [], outputs: [] };
|
|
143
|
+
}
|
|
144
|
+
const devices = await navigator.mediaDevices.enumerateDevices();
|
|
145
|
+
const inputs = devices.filter((d) => d.kind === "audioinput");
|
|
146
|
+
const outputs = devices.filter((d) => d.kind === "audiooutput");
|
|
147
|
+
return { inputs, outputs };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
/** Browser-feature probes used to decide which UI controls make sense. */
|
|
151
|
+
export interface AudioCapabilities {
|
|
152
|
+
/** setSinkId is unsupported on Safari and Firefox at the time of writing. */
|
|
153
|
+
setSinkIdSupported: boolean;
|
|
154
|
+
/** voiceIsolation is honored on Chromium/Edge with system support only. */
|
|
155
|
+
voiceIsolationSupported: boolean;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
export function probeAudioCapabilities(): AudioCapabilities {
|
|
159
|
+
const audioEl = typeof HTMLAudioElement !== "undefined"
|
|
160
|
+
? HTMLAudioElement.prototype
|
|
161
|
+
: null;
|
|
162
|
+
const setSinkIdSupported = Boolean(
|
|
163
|
+
audioEl && typeof (audioEl as unknown as { setSinkId?: unknown }).setSinkId === "function",
|
|
164
|
+
);
|
|
165
|
+
|
|
166
|
+
// No reliable feature flag for voiceIsolation; the actual support comes
|
|
167
|
+
// out in MediaTrackSettings.getSettings() once the track is published.
|
|
168
|
+
// Best we can do up-front is gate on getSupportedConstraints().
|
|
169
|
+
let voiceIsolationSupported = false;
|
|
170
|
+
try {
|
|
171
|
+
const supported = navigator.mediaDevices?.getSupportedConstraints?.() as
|
|
172
|
+
| (MediaTrackSupportedConstraints & { voiceIsolation?: boolean })
|
|
173
|
+
| undefined;
|
|
174
|
+
voiceIsolationSupported = Boolean(supported?.voiceIsolation);
|
|
175
|
+
} catch {
|
|
176
|
+
voiceIsolationSupported = false;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
return { setSinkIdSupported, voiceIsolationSupported };
|
|
180
|
+
}
|