@oshara/voice-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +198 -0
  2. package/dist/appearance-CNWT8x1G.cjs +2 -0
  3. package/dist/appearance-CNWT8x1G.cjs.map +1 -0
  4. package/dist/appearance-i6QBkpCk.js +650 -0
  5. package/dist/appearance-i6QBkpCk.js.map +1 -0
  6. package/dist/consent-CK9VXNPa.js +54 -0
  7. package/dist/consent-CK9VXNPa.js.map +1 -0
  8. package/dist/consent-D7QNSkQD.cjs +2 -0
  9. package/dist/consent-D7QNSkQD.cjs.map +1 -0
  10. package/dist/core/analytics.d.ts +30 -0
  11. package/dist/core/appearance.d.ts +113 -0
  12. package/dist/core/audioSettings.d.ts +69 -0
  13. package/dist/core/consent.d.ts +17 -0
  14. package/dist/core/createVoiceAgent.d.ts +79 -0
  15. package/dist/core/events.d.ts +103 -0
  16. package/dist/core/formController.d.ts +28 -0
  17. package/dist/core/forms.d.ts +235 -0
  18. package/dist/core/index.d.ts +29 -0
  19. package/dist/core/prevContext.d.ts +26 -0
  20. package/dist/core/transport.d.ts +30 -0
  21. package/dist/core/types.d.ts +49 -0
  22. package/dist/core/voice.d.ts +79 -0
  23. package/dist/createVoiceAgent-BM3HODS6.js +1058 -0
  24. package/dist/createVoiceAgent-BM3HODS6.js.map +1 -0
  25. package/dist/createVoiceAgent-CJWxWzz6.cjs +4 -0
  26. package/dist/createVoiceAgent-CJWxWzz6.cjs.map +1 -0
  27. package/dist/index.cjs +2 -0
  28. package/dist/index.cjs.map +1 -0
  29. package/dist/index.js +44 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/react/index.d.ts +60 -0
  32. package/dist/react.cjs +2 -0
  33. package/dist/react.cjs.map +1 -0
  34. package/dist/react.js +115 -0
  35. package/dist/react.js.map +1 -0
  36. package/dist/styles.css +1838 -0
  37. package/dist/ui/index.d.ts +21 -0
  38. package/dist/ui/ui.d.ts +165 -0
  39. package/dist/ui.cjs +284 -0
  40. package/dist/ui.cjs.map +1 -0
  41. package/dist/ui.js +1153 -0
  42. package/dist/ui.js.map +1 -0
  43. package/package.json +67 -0
  44. package/src/core/analytics.ts +111 -0
  45. package/src/core/appearance.ts +464 -0
  46. package/src/core/audioSettings.ts +180 -0
  47. package/src/core/consent.ts +78 -0
  48. package/src/core/createVoiceAgent.ts +280 -0
  49. package/src/core/events.ts +120 -0
  50. package/src/core/formController.ts +317 -0
  51. package/src/core/forms.ts +861 -0
  52. package/src/core/index.ts +121 -0
  53. package/src/core/prevContext.ts +153 -0
  54. package/src/core/transport.ts +118 -0
  55. package/src/core/types.ts +66 -0
  56. package/src/core/voice.ts +1179 -0
  57. package/src/react/index.ts +238 -0
  58. package/src/ui/index.ts +507 -0
  59. package/src/ui/styles.css +1838 -0
  60. package/src/ui/ui.ts +1672 -0
  61. package/src/vite-env.d.ts +10 -0
@@ -0,0 +1,464 @@
1
+ import {
2
+ DEFAULT_FORM_DEFINITIONS,
3
+ FormDefinition,
4
+ normalizeFormDefinitions,
5
+ } from "./forms";
6
+
7
+ /**
8
+ * Per-agent widget appearance configuration.
9
+ *
10
+ * The widget fetches this from `GET {apiUrl}/api/agents/{slug}/appearance/`
11
+ * (override with `data-appearance-url`) on init, falling back to the defaults
12
+ * below if the request fails or any field is missing.
13
+ *
14
+ * Every field is optional on the wire — partial responses are merged on top of
15
+ * the defaults so the widget never breaks if the backend hasn't been updated.
16
+ */
17
+
18
+ export type WidgetPosition =
19
+ | "bottom-right"
20
+ | "bottom-left"
21
+ | "top-right"
22
+ | "top-left";
23
+
24
+ export interface AppearanceTheme {
25
+ primary_color: string;
26
+ accent_color: string;
27
+ background_color: string;
28
+ text_color: string;
29
+ user_bubble_color: string;
30
+ user_bubble_text_color: string;
31
+ agent_bubble_color: string;
32
+ agent_bubble_text_color: string;
33
+ }
34
+
35
+ export interface AppearanceDimensions {
36
+ fab_size: number;
37
+ panel_width: number;
38
+ panel_height: number;
39
+ border_radius: number;
40
+ }
41
+
42
+ export interface AppearanceLayout {
43
+ position: WidgetPosition;
44
+ font_family: string;
45
+ }
46
+
47
+ export interface AppearanceLabels {
48
+ idle: string;
49
+ connecting: string;
50
+ listening: string;
51
+ speaking: string;
52
+ /** Shown while the agent is processing (running STT→LLM→tool→TTS). The
53
+ * backend may override this with a per-tool contextual label
54
+ * ("Searching the knowledge base…") via voice.agent_status events. */
55
+ thinking: string;
56
+ muted: string;
57
+ call_ended: string;
58
+ transcript_placeholder: string;
59
+ language_label: string;
60
+ }
61
+
62
+ export interface AppearanceLanguage {
63
+ /** BCP-47-ish short code sent to the backend (e.g. "en", "ne"). */
64
+ code: string;
65
+ /** English label, e.g. "English", "Nepali". */
66
+ label: string;
67
+ /** Native script label, e.g. "English", "नेपाली". Falls back to label. */
68
+ native_label: string;
69
+ }
70
+
71
+ export interface AppearanceConfig {
72
+ name: string;
73
+ subtitle: string;
74
+ logo_url: string;
75
+ fab_label: string;
76
+ fab_sublabel: string;
77
+ start_button_text: string;
78
+ powered_by_text: string;
79
+ powered_by_url: string;
80
+ theme: AppearanceTheme;
81
+ dimensions: AppearanceDimensions;
82
+ layout: AppearanceLayout;
83
+ labels: AppearanceLabels;
84
+ languages: AppearanceLanguage[];
85
+ default_language: string;
86
+ /** Hard cap on a single voice call, in seconds. 0 = no limit. */
87
+ max_call_seconds: number;
88
+ /** URL to the terms & conditions / privacy policy page. When set, a
89
+ * consent line is shown beneath the Start button stating that starting
90
+ * the call accepts these terms. Empty disables the consent UI. */
91
+ terms_url: string;
92
+ /** Visible label for the terms link (default "Terms & Conditions"). */
93
+ terms_label: string;
94
+ /** Prefix shown before the terms link (default
95
+ * "By starting the call, you agree to our"). */
96
+ consent_text: string;
97
+ /**
98
+ * Extra form definitions (book-a-demo, reservation, lead capture, …) the
99
+ * agent can open mid-call by publishing a LiveKit data message. Built-in
100
+ * defaults are always available; entries here override defaults by id.
101
+ * See `forms.ts` for the wire protocol.
102
+ */
103
+ forms: FormDefinition[];
104
+ /**
105
+ * Whether to expose the in-widget audio settings drawer (mic/speaker
106
+ * selection, NC/AEC/AGC toggles, headphone mode, mic-level meter,
107
+ * diagnostics row). Defaults to true. Set false to hide the gear icon
108
+ * entirely for a kiosk-style minimal UI.
109
+ */
110
+ show_audio_settings: boolean;
111
+ }
112
+
113
+ export const DEFAULT_APPEARANCE: AppearanceConfig = {
114
+ name: "Voice Assistant",
115
+ subtitle: "Tap below to start a voice conversation.",
116
+ logo_url: "",
117
+ // Empty by default — only show FAB text when the backend explicitly
118
+ // configures fab_label/fab_sublabel. Otherwise the FAB is icon-only.
119
+ fab_label: "",
120
+ fab_sublabel: "",
121
+ start_button_text: "Start Call",
122
+ powered_by_text: "Powered by Oshara.ai",
123
+ powered_by_url: "",
124
+ theme: {
125
+ primary_color: "#6366F1",
126
+ accent_color: "#22D3EE",
127
+ background_color: "#FFFFFF",
128
+ text_color: "#0F172A",
129
+ user_bubble_color: "#6366F1",
130
+ user_bubble_text_color: "#FFFFFF",
131
+ agent_bubble_color: "#F1F5F9",
132
+ agent_bubble_text_color: "#0F172A",
133
+ },
134
+ dimensions: {
135
+ fab_size: 64,
136
+ panel_width: 380,
137
+ panel_height: 620,
138
+ border_radius: 24,
139
+ },
140
+ layout: {
141
+ position: "bottom-right",
142
+ font_family:
143
+ "Inter, -apple-system, BlinkMacSystemFont, 'Segoe UI', sans-serif",
144
+ },
145
+ labels: {
146
+ idle: "Idle",
147
+ connecting: "Connecting…",
148
+ listening: "Listening",
149
+ speaking: "Speaking…",
150
+ thinking: "Thinking…",
151
+ muted: "Microphone muted",
152
+ call_ended: "Call ended",
153
+ transcript_placeholder: "Your live transcript will appear here",
154
+ language_label: "Language",
155
+ },
156
+ languages: [
157
+ { code: "en", label: "English", native_label: "English" },
158
+ { code: "ne", label: "Nepali", native_label: "नेपाली" },
159
+ ],
160
+ default_language: "en",
161
+ max_call_seconds: 0,
162
+ terms_url: "",
163
+ terms_label: "Terms & Conditions",
164
+ consent_text: "By starting the call, you agree to our",
165
+ forms: DEFAULT_FORM_DEFINITIONS,
166
+ show_audio_settings: true,
167
+ };
168
+
169
+ const POSITIONS: WidgetPosition[] = [
170
+ "bottom-right",
171
+ "bottom-left",
172
+ "top-right",
173
+ "top-left",
174
+ ];
175
+
176
+ /**
177
+ * Deep-merge a partial server response on top of DEFAULT_APPEARANCE.
178
+ * Only known keys are accepted; unknown keys are ignored.
179
+ */
180
+ export function mergeAppearance(
181
+ partial: Partial<AppearanceConfig> | null | undefined,
182
+ ): AppearanceConfig {
183
+ if (!partial || typeof partial !== "object") return DEFAULT_APPEARANCE;
184
+ const safe = asRecord(partial);
185
+ const theme = asRecord(safe.theme);
186
+ const dimensions = asRecord(safe.dimensions);
187
+ const layout = asRecord(safe.layout);
188
+ const labels = asRecord(safe.labels);
189
+
190
+ return {
191
+ name: nonEmptyString(safe.name) ?? DEFAULT_APPEARANCE.name,
192
+ subtitle: stringOrDefault(safe.subtitle, DEFAULT_APPEARANCE.subtitle),
193
+ logo_url: stringOrDefault(safe.logo_url, DEFAULT_APPEARANCE.logo_url),
194
+ fab_label: stringOrDefault(safe.fab_label, DEFAULT_APPEARANCE.fab_label),
195
+ fab_sublabel: stringOrDefault(
196
+ safe.fab_sublabel,
197
+ DEFAULT_APPEARANCE.fab_sublabel,
198
+ ),
199
+ start_button_text:
200
+ nonEmptyString(safe.start_button_text) ??
201
+ DEFAULT_APPEARANCE.start_button_text,
202
+ powered_by_text: stringOrDefault(
203
+ safe.powered_by_text,
204
+ DEFAULT_APPEARANCE.powered_by_text,
205
+ ),
206
+ powered_by_url: stringOrDefault(
207
+ safe.powered_by_url,
208
+ DEFAULT_APPEARANCE.powered_by_url,
209
+ ),
210
+ theme: {
211
+ primary_color:
212
+ nonEmptyString(theme.primary_color) ??
213
+ DEFAULT_APPEARANCE.theme.primary_color,
214
+ accent_color:
215
+ nonEmptyString(theme.accent_color) ??
216
+ DEFAULT_APPEARANCE.theme.accent_color,
217
+ background_color:
218
+ nonEmptyString(theme.background_color) ??
219
+ DEFAULT_APPEARANCE.theme.background_color,
220
+ text_color:
221
+ nonEmptyString(theme.text_color) ?? DEFAULT_APPEARANCE.theme.text_color,
222
+ user_bubble_color:
223
+ nonEmptyString(theme.user_bubble_color) ??
224
+ DEFAULT_APPEARANCE.theme.user_bubble_color,
225
+ user_bubble_text_color:
226
+ nonEmptyString(theme.user_bubble_text_color) ??
227
+ DEFAULT_APPEARANCE.theme.user_bubble_text_color,
228
+ agent_bubble_color:
229
+ nonEmptyString(theme.agent_bubble_color) ??
230
+ DEFAULT_APPEARANCE.theme.agent_bubble_color,
231
+ agent_bubble_text_color:
232
+ nonEmptyString(theme.agent_bubble_text_color) ??
233
+ DEFAULT_APPEARANCE.theme.agent_bubble_text_color,
234
+ },
235
+ dimensions: {
236
+ fab_size:
237
+ pixelOrNumber(dimensions.fab_size) ??
238
+ DEFAULT_APPEARANCE.dimensions.fab_size,
239
+ panel_width:
240
+ pixelOrNumber(dimensions.panel_width) ??
241
+ DEFAULT_APPEARANCE.dimensions.panel_width,
242
+ panel_height:
243
+ pixelOrNumber(dimensions.panel_height) ??
244
+ DEFAULT_APPEARANCE.dimensions.panel_height,
245
+ border_radius:
246
+ pixelOrNumber(dimensions.border_radius) ??
247
+ DEFAULT_APPEARANCE.dimensions.border_radius,
248
+ },
249
+ layout: {
250
+ position:
251
+ enumOrUndefined(POSITIONS, layout.position) ??
252
+ DEFAULT_APPEARANCE.layout.position,
253
+ font_family: stringOrDefault(
254
+ layout.font_family,
255
+ DEFAULT_APPEARANCE.layout.font_family,
256
+ ),
257
+ },
258
+ labels: {
259
+ idle: nonEmptyString(labels.idle) ?? DEFAULT_APPEARANCE.labels.idle,
260
+ connecting:
261
+ nonEmptyString(labels.connecting) ??
262
+ DEFAULT_APPEARANCE.labels.connecting,
263
+ listening:
264
+ nonEmptyString(labels.listening) ?? DEFAULT_APPEARANCE.labels.listening,
265
+ speaking:
266
+ nonEmptyString(labels.speaking) ?? DEFAULT_APPEARANCE.labels.speaking,
267
+ thinking:
268
+ nonEmptyString(labels.thinking) ?? DEFAULT_APPEARANCE.labels.thinking,
269
+ muted: nonEmptyString(labels.muted) ?? DEFAULT_APPEARANCE.labels.muted,
270
+ call_ended:
271
+ nonEmptyString(labels.call_ended) ??
272
+ DEFAULT_APPEARANCE.labels.call_ended,
273
+ transcript_placeholder: stringOrDefault(
274
+ labels.transcript_placeholder,
275
+ DEFAULT_APPEARANCE.labels.transcript_placeholder,
276
+ ),
277
+ language_label:
278
+ nonEmptyString(labels.language_label) ??
279
+ DEFAULT_APPEARANCE.labels.language_label,
280
+ },
281
+ languages: normalizeLanguages(safe.languages),
282
+ default_language:
283
+ nonEmptyString(safe.default_language) ??
284
+ DEFAULT_APPEARANCE.default_language,
285
+ max_call_seconds:
286
+ nonNegativeNumber(safe.max_call_seconds) ??
287
+ DEFAULT_APPEARANCE.max_call_seconds,
288
+ terms_url: stringOrDefault(safe.terms_url, DEFAULT_APPEARANCE.terms_url),
289
+ terms_label:
290
+ nonEmptyString(safe.terms_label) ?? DEFAULT_APPEARANCE.terms_label,
291
+ consent_text: stringOrDefault(
292
+ safe.consent_text,
293
+ DEFAULT_APPEARANCE.consent_text,
294
+ ),
295
+ forms: mergeForms(safe.forms),
296
+ show_audio_settings:
297
+ typeof safe.show_audio_settings === "boolean"
298
+ ? safe.show_audio_settings
299
+ : DEFAULT_APPEARANCE.show_audio_settings,
300
+ };
301
+ }
302
+
303
+ function mergeForms(value: unknown): FormDefinition[] {
304
+ const custom = normalizeFormDefinitions(value);
305
+ const map = new Map<string, FormDefinition>();
306
+ for (const f of DEFAULT_FORM_DEFINITIONS) map.set(f.id, f);
307
+ for (const f of custom) map.set(f.id, f);
308
+ return Array.from(map.values());
309
+ }
310
+
311
+ function normalizeLanguages(value: unknown): AppearanceLanguage[] {
312
+ if (!Array.isArray(value)) return DEFAULT_APPEARANCE.languages;
313
+ const seen = new Set<string>();
314
+ const out: AppearanceLanguage[] = [];
315
+ for (const raw of value) {
316
+ if (!isPlainObject(raw)) continue;
317
+ const code = nonEmptyString(raw.code);
318
+ if (!code || seen.has(code)) continue;
319
+ const label = nonEmptyString(raw.label) ?? code.toUpperCase();
320
+ const native = nonEmptyString(raw.native_label) ?? label;
321
+ seen.add(code);
322
+ out.push({ code, label, native_label: native });
323
+ }
324
+ return out.length ? out : DEFAULT_APPEARANCE.languages;
325
+ }
326
+
327
+ export interface FetchAppearanceArgs {
328
+ /** Resolved URL (already substituted) — takes precedence when present. */
329
+ appearanceUrl: string;
330
+ /** Falls back to `${apiUrl}/api/agents/${slug}/appearance/`. */
331
+ apiUrl: string;
332
+ slug: string;
333
+ /** Secret key sent as `x-api-key` (server/trusted mode). */
334
+ apiKey?: string;
335
+ /** Custom fetch (Node <18 / testing). Defaults to globalThis.fetch. */
336
+ fetch?: typeof fetch;
337
+ }
338
+
339
+ export async function fetchAppearance(
340
+ args: FetchAppearanceArgs,
341
+ ): Promise<AppearanceConfig> {
342
+ const url =
343
+ args.appearanceUrl ||
344
+ `${args.apiUrl.replace(/\/$/, "")}/api/agents/${encodeURIComponent(
345
+ args.slug,
346
+ )}/appearance/`;
347
+
348
+ const headers: Record<string, string> = { Accept: "application/json" };
349
+ if (args.apiKey?.trim()) headers["x-api-key"] = args.apiKey.trim();
350
+ const doFetch =
351
+ args.fetch ??
352
+ (typeof globalThis.fetch === "function"
353
+ ? globalThis.fetch.bind(globalThis)
354
+ : null);
355
+ if (!doFetch) {
356
+ // eslint-disable-next-line no-console
357
+ console.warn("[voice-agent] no fetch available; using default appearance");
358
+ return DEFAULT_APPEARANCE;
359
+ }
360
+
361
+ // eslint-disable-next-line no-console
362
+ console.info("[voice-agent] fetching appearance from", url);
363
+
364
+ try {
365
+ const r = await doFetch(url, { method: "GET", headers });
366
+ if (!r.ok) {
367
+ // eslint-disable-next-line no-console
368
+ console.warn(
369
+ `[voice-agent] appearance fetch returned ${r.status}; using defaults`,
370
+ );
371
+ return DEFAULT_APPEARANCE;
372
+ }
373
+ const body: any = await r.json();
374
+ // eslint-disable-next-line no-console
375
+ console.info("[voice-agent] appearance response", body);
376
+ const payload = unwrapAppearanceResponse(body);
377
+ const merged = mergeAppearance(payload);
378
+ // eslint-disable-next-line no-console
379
+ console.info("[voice-agent] applied appearance", merged);
380
+ return merged;
381
+ } catch (err) {
382
+ // eslint-disable-next-line no-console
383
+ console.warn("[voice-agent] appearance fetch failed; using defaults:", err);
384
+ return DEFAULT_APPEARANCE;
385
+ }
386
+ }
387
+
388
+ function unwrapAppearanceResponse(
389
+ body: unknown,
390
+ ): Partial<AppearanceConfig> | null {
391
+ if (!isPlainObject(body)) return null;
392
+ const safe = body as Record<string, unknown>;
393
+
394
+ const candidates: unknown[] = [
395
+ safe.data,
396
+ safe.appearance,
397
+ safe.widget_appearance,
398
+ safe,
399
+ ];
400
+ for (const candidate of candidates) {
401
+ if (isPlainObject(candidate)) {
402
+ return candidate as Partial<AppearanceConfig>;
403
+ }
404
+ }
405
+ return null;
406
+ }
407
+
408
+ function isPlainObject(value: unknown): value is Record<string, unknown> {
409
+ return value !== null && typeof value === "object" && !Array.isArray(value);
410
+ }
411
+
412
+ function asRecord(value: unknown): Record<string, unknown> {
413
+ return isPlainObject(value) ? value : {};
414
+ }
415
+
416
+ function nonEmptyString(value: unknown): string | undefined {
417
+ if (typeof value !== "string") return undefined;
418
+ const trimmed = value.trim();
419
+ return trimmed ? trimmed : undefined;
420
+ }
421
+
422
+ function stringOrDefault(value: unknown, fallback: string): string {
423
+ return typeof value === "string" ? value : fallback;
424
+ }
425
+
426
+ function finitePositiveNumber(value: unknown): number | undefined {
427
+ return typeof value === "number" && Number.isFinite(value) && value > 0
428
+ ? value
429
+ : undefined;
430
+ }
431
+
432
+ function nonNegativeNumber(value: unknown): number | undefined {
433
+ if (typeof value === "number" && Number.isFinite(value) && value >= 0) {
434
+ return value;
435
+ }
436
+ if (typeof value === "string") {
437
+ const trimmed = value.trim();
438
+ if (!trimmed) return undefined;
439
+ const parsed = Number(trimmed);
440
+ if (Number.isFinite(parsed) && parsed >= 0) return parsed;
441
+ }
442
+ return undefined;
443
+ }
444
+
445
+ function pixelOrNumber(value: unknown): number | undefined {
446
+ const direct = finitePositiveNumber(value);
447
+ if (direct !== undefined) return direct;
448
+ if (typeof value !== "string") return undefined;
449
+
450
+ const match = value.trim().match(/^(\d+(?:\.\d+)?)px$/i);
451
+ if (!match) return undefined;
452
+
453
+ const parsed = Number(match[1]);
454
+ return Number.isFinite(parsed) && parsed > 0 ? parsed : undefined;
455
+ }
456
+
457
+ function enumOrUndefined<T extends string>(
458
+ allowed: readonly T[],
459
+ value: unknown,
460
+ ): T | undefined {
461
+ return typeof value === "string" && allowed.includes(value as T)
462
+ ? (value as T)
463
+ : undefined;
464
+ }
@@ -0,0 +1,180 @@
1
+ /**
2
+ * Per-agent audio preferences for the voice widget.
3
+ *
4
+ * Persists the user's chosen mic/speaker devices, processor toggles, and
5
+ * speaker volume so subsequent calls open with the same setup. Reads &
6
+ * writes are namespaced by agent slug so different agents can hold
7
+ * different preferences in the same browser.
8
+ */
9
+
10
+ /**
11
+ * Choice of deep-learning noise-suppression engine layered on top of the
12
+ * browser's native NS. Picked per-user via the audio settings drawer.
13
+ *
14
+ * - `off` — rely on browser NS + voiceIsolation only.
15
+ * - `krisp` — @livekit/krisp-noise-filter (closed-source, free for LK).
16
+ * - `deepfilter` — deepfilternet3-noise-filter (mezonai/mezon-noise-suppression),
17
+ * open-source DeepFilterNet3 with a 0-100 strength knob.
18
+ */
19
+ export type NoiseFilterEngine = "off" | "krisp" | "deepfilter";
20
+
21
+ export interface AudioPrefs {
22
+ /** Browser-native AEC. Standard MediaTrackSettings.echoCancellation. */
23
+ echoCancellation: boolean;
24
+ /** Browser-native noise suppression. Standard MediaTrackSettings. */
25
+ noiseSuppression: boolean;
26
+ /** Browser-native automatic gain control. */
27
+ autoGainControl: boolean;
28
+ /** Microsoft / Chrome non-standard hardware voice isolation. */
29
+ voiceIsolation: boolean;
30
+ /** Which deep-learning noise-suppression processor to attach. */
31
+ noiseFilter: NoiseFilterEngine;
32
+ /** DeepFilterNet3 strength 0-100 (ignored unless noiseFilter === "deepfilter"). */
33
+ deepFilterStrength: number;
34
+ /** Selected mic deviceId (empty string = system default). */
35
+ micDeviceId: string;
36
+ /** Selected speaker deviceId for setSinkId (empty = default). */
37
+ speakerDeviceId: string;
38
+ /** Speaker volume 0-100, applied to HTMLAudioElement.volume / 100. */
39
+ outputVolume: number;
40
+ /**
41
+ * When true, disables half-duplex ducking. The user wears headphones so
42
+ * the mic can't physically pick up the speaker — no need to mute them
43
+ * while the agent talks.
44
+ */
45
+ headphonesMode: boolean;
46
+ /** Whether to show live transcription in the UI. */
47
+ transcriptionEnabled: boolean;
48
+ /** Whether to show the text input box during a call. */
49
+ textInputEnabled: boolean;
50
+ }
51
+
52
+ export const DEFAULT_AUDIO_PREFS: AudioPrefs = {
53
+ echoCancellation: true,
54
+ noiseSuppression: true,
55
+ // Off by default: AGC boosts the trailing tail of an utterance, which
56
+ // confuses the agent's contextual turn detector ("user is still
57
+ // talking softly") and delays preemptive LLM generation. Users can
58
+ // toggle this on per-call from the audio settings drawer if they're
59
+ // soft-spoken and the agent isn't hearing them.
60
+ autoGainControl: false,
61
+ voiceIsolation: true,
62
+ noiseFilter: "deepfilter",
63
+ deepFilterStrength: 40,
64
+ micDeviceId: "",
65
+ speakerDeviceId: "",
66
+ outputVolume: 85,
67
+ headphonesMode: true,
68
+ transcriptionEnabled: false,
69
+ textInputEnabled: false,
70
+ };
71
+
72
+ const STORAGE_PREFIX = "voice-agent:audio-prefs:";
73
+
74
+ function storageKey(agentSlug: string): string {
75
+ return `${STORAGE_PREFIX}${agentSlug || "default"}`;
76
+ }
77
+
78
+ export function loadAudioPrefs(agentSlug: string): AudioPrefs {
79
+ try {
80
+ const raw = window.localStorage.getItem(storageKey(agentSlug));
81
+ if (!raw) return { ...DEFAULT_AUDIO_PREFS };
82
+ const parsed = JSON.parse(raw);
83
+ if (!parsed || typeof parsed !== "object") {
84
+ return { ...DEFAULT_AUDIO_PREFS };
85
+ }
86
+ return { ...DEFAULT_AUDIO_PREFS, ...normalize(parsed) };
87
+ } catch {
88
+ return { ...DEFAULT_AUDIO_PREFS };
89
+ }
90
+ }
91
+
92
+ export function saveAudioPrefs(agentSlug: string, prefs: AudioPrefs): void {
93
+ try {
94
+ window.localStorage.setItem(
95
+ storageKey(agentSlug),
96
+ JSON.stringify(normalize(prefs as unknown as Record<string, unknown>)),
97
+ );
98
+ } catch {
99
+ // Storage may be disabled (private mode, quota); preferences just
100
+ // won't survive a reload. Not worth surfacing to the user.
101
+ }
102
+ }
103
+
104
+ function normalize(input: Record<string, unknown>): Partial<AudioPrefs> {
105
+ const out: Partial<AudioPrefs> = {};
106
+ if (typeof input.echoCancellation === "boolean") out.echoCancellation = input.echoCancellation;
107
+ if (typeof input.noiseSuppression === "boolean") out.noiseSuppression = input.noiseSuppression;
108
+ if (typeof input.autoGainControl === "boolean") out.autoGainControl = input.autoGainControl;
109
+ if (typeof input.voiceIsolation === "boolean") out.voiceIsolation = input.voiceIsolation;
110
+ if (input.noiseFilter === "off" || input.noiseFilter === "krisp" || input.noiseFilter === "deepfilter") {
111
+ out.noiseFilter = input.noiseFilter;
112
+ } else if (typeof input.krispEnabled === "boolean") {
113
+ // Migration path from the pre-engine-picker AudioPrefs shape.
114
+ out.noiseFilter = input.krispEnabled ? "krisp" : "off";
115
+ }
116
+ if (typeof input.deepFilterStrength === "number" && Number.isFinite(input.deepFilterStrength)) {
117
+ out.deepFilterStrength = Math.max(0, Math.min(100, input.deepFilterStrength));
118
+ }
119
+ if (typeof input.micDeviceId === "string") out.micDeviceId = input.micDeviceId;
120
+ if (typeof input.speakerDeviceId === "string") out.speakerDeviceId = input.speakerDeviceId;
121
+ if (typeof input.outputVolume === "number" && Number.isFinite(input.outputVolume)) {
122
+ out.outputVolume = Math.max(0, Math.min(100, input.outputVolume));
123
+ }
124
+ if (typeof input.headphonesMode === "boolean") out.headphonesMode = input.headphonesMode;
125
+ if (typeof input.transcriptionEnabled === "boolean") out.transcriptionEnabled = input.transcriptionEnabled;
126
+ if (typeof input.textInputEnabled === "boolean") out.textInputEnabled = input.textInputEnabled;
127
+ return out;
128
+ }
129
+
130
+ export interface AudioDevices {
131
+ inputs: MediaDeviceInfo[];
132
+ outputs: MediaDeviceInfo[];
133
+ }
134
+
135
+ /**
136
+ * Enumerate audio I/O devices. Device labels are only populated after the
137
+ * user has granted mic permission for the page; before that the labels
138
+ * come back empty and the caller should show a placeholder.
139
+ */
140
+ export async function enumerateAudioDevices(): Promise<AudioDevices> {
141
+ if (!navigator.mediaDevices?.enumerateDevices) {
142
+ return { inputs: [], outputs: [] };
143
+ }
144
+ const devices = await navigator.mediaDevices.enumerateDevices();
145
+ const inputs = devices.filter((d) => d.kind === "audioinput");
146
+ const outputs = devices.filter((d) => d.kind === "audiooutput");
147
+ return { inputs, outputs };
148
+ }
149
+
150
+ /** Browser-feature probes used to decide which UI controls make sense. */
151
+ export interface AudioCapabilities {
152
+ /** setSinkId is unsupported on Safari and Firefox at the time of writing. */
153
+ setSinkIdSupported: boolean;
154
+ /** voiceIsolation is honored on Chromium/Edge with system support only. */
155
+ voiceIsolationSupported: boolean;
156
+ }
157
+
158
+ export function probeAudioCapabilities(): AudioCapabilities {
159
+ const audioEl = typeof HTMLAudioElement !== "undefined"
160
+ ? HTMLAudioElement.prototype
161
+ : null;
162
+ const setSinkIdSupported = Boolean(
163
+ audioEl && typeof (audioEl as unknown as { setSinkId?: unknown }).setSinkId === "function",
164
+ );
165
+
166
+ // No reliable feature flag for voiceIsolation; the actual support comes
167
+ // out in MediaTrackSettings.getSettings() once the track is published.
168
+ // Best we can do up-front is gate on getSupportedConstraints().
169
+ let voiceIsolationSupported = false;
170
+ try {
171
+ const supported = navigator.mediaDevices?.getSupportedConstraints?.() as
172
+ | (MediaTrackSupportedConstraints & { voiceIsolation?: boolean })
173
+ | undefined;
174
+ voiceIsolationSupported = Boolean(supported?.voiceIsolation);
175
+ } catch {
176
+ voiceIsolationSupported = false;
177
+ }
178
+
179
+ return { setSinkIdSupported, voiceIsolationSupported };
180
+ }