@oshara/voice-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +198 -0
  2. package/dist/appearance-CNWT8x1G.cjs +2 -0
  3. package/dist/appearance-CNWT8x1G.cjs.map +1 -0
  4. package/dist/appearance-i6QBkpCk.js +650 -0
  5. package/dist/appearance-i6QBkpCk.js.map +1 -0
  6. package/dist/consent-CK9VXNPa.js +54 -0
  7. package/dist/consent-CK9VXNPa.js.map +1 -0
  8. package/dist/consent-D7QNSkQD.cjs +2 -0
  9. package/dist/consent-D7QNSkQD.cjs.map +1 -0
  10. package/dist/core/analytics.d.ts +30 -0
  11. package/dist/core/appearance.d.ts +113 -0
  12. package/dist/core/audioSettings.d.ts +69 -0
  13. package/dist/core/consent.d.ts +17 -0
  14. package/dist/core/createVoiceAgent.d.ts +79 -0
  15. package/dist/core/events.d.ts +103 -0
  16. package/dist/core/formController.d.ts +28 -0
  17. package/dist/core/forms.d.ts +235 -0
  18. package/dist/core/index.d.ts +29 -0
  19. package/dist/core/prevContext.d.ts +26 -0
  20. package/dist/core/transport.d.ts +30 -0
  21. package/dist/core/types.d.ts +49 -0
  22. package/dist/core/voice.d.ts +79 -0
  23. package/dist/createVoiceAgent-BM3HODS6.js +1058 -0
  24. package/dist/createVoiceAgent-BM3HODS6.js.map +1 -0
  25. package/dist/createVoiceAgent-CJWxWzz6.cjs +4 -0
  26. package/dist/createVoiceAgent-CJWxWzz6.cjs.map +1 -0
  27. package/dist/index.cjs +2 -0
  28. package/dist/index.cjs.map +1 -0
  29. package/dist/index.js +44 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/react/index.d.ts +60 -0
  32. package/dist/react.cjs +2 -0
  33. package/dist/react.cjs.map +1 -0
  34. package/dist/react.js +115 -0
  35. package/dist/react.js.map +1 -0
  36. package/dist/styles.css +1838 -0
  37. package/dist/ui/index.d.ts +21 -0
  38. package/dist/ui/ui.d.ts +165 -0
  39. package/dist/ui.cjs +284 -0
  40. package/dist/ui.cjs.map +1 -0
  41. package/dist/ui.js +1153 -0
  42. package/dist/ui.js.map +1 -0
  43. package/package.json +67 -0
  44. package/src/core/analytics.ts +111 -0
  45. package/src/core/appearance.ts +464 -0
  46. package/src/core/audioSettings.ts +180 -0
  47. package/src/core/consent.ts +78 -0
  48. package/src/core/createVoiceAgent.ts +280 -0
  49. package/src/core/events.ts +120 -0
  50. package/src/core/formController.ts +317 -0
  51. package/src/core/forms.ts +861 -0
  52. package/src/core/index.ts +121 -0
  53. package/src/core/prevContext.ts +153 -0
  54. package/src/core/transport.ts +118 -0
  55. package/src/core/types.ts +66 -0
  56. package/src/core/voice.ts +1179 -0
  57. package/src/react/index.ts +238 -0
  58. package/src/ui/index.ts +507 -0
  59. package/src/ui/styles.css +1838 -0
  60. package/src/ui/ui.ts +1672 -0
  61. package/src/vite-env.d.ts +10 -0
@@ -0,0 +1,78 @@
1
+ /**
2
+ * Per-agent localStorage record of the user's acceptance of the terms &
3
+ * conditions presented in the welcome screen. Starting a call implicitly
4
+ * accepts the terms; we persist that acceptance so we can show a quieter
5
+ * "accepted" state on return visits and have an audit trail of which
6
+ * terms_url version was accepted.
7
+ */
8
+
9
+ const STORAGE_PREFIX = "voiceAgent.consent";
10
+
11
+ export interface ConsentRecord {
12
+ /** ms-since-epoch when the user first accepted. */
13
+ acceptedAt: number;
14
+ /** The terms_url that was in effect at acceptance time. */
15
+ termsUrl: string;
16
+ }
17
+
18
+ function storageKey(agentSlug: string): string {
19
+ return `${STORAGE_PREFIX}.${agentSlug}`;
20
+ }
21
+
22
+ function safeLocalStorage(): Storage | null {
23
+ try {
24
+ return window.localStorage;
25
+ } catch {
26
+ return null;
27
+ }
28
+ }
29
+
30
+ export function loadConsent(agentSlug: string): ConsentRecord | null {
31
+ if (!agentSlug) return null;
32
+ const ls = safeLocalStorage();
33
+ if (!ls) return null;
34
+
35
+ let raw: string | null = null;
36
+ try {
37
+ raw = ls.getItem(storageKey(agentSlug));
38
+ } catch {
39
+ return null;
40
+ }
41
+ if (!raw) return null;
42
+
43
+ try {
44
+ const parsed = JSON.parse(raw);
45
+ if (!parsed || typeof parsed !== "object") return null;
46
+ const acceptedAt =
47
+ typeof parsed.acceptedAt === "number" ? parsed.acceptedAt : 0;
48
+ const termsUrl =
49
+ typeof parsed.termsUrl === "string" ? parsed.termsUrl : "";
50
+ if (!acceptedAt) return null;
51
+ return { acceptedAt, termsUrl };
52
+ } catch {
53
+ return null;
54
+ }
55
+ }
56
+
57
+ /** Returns true if the user has accepted the *current* terms_url before. */
58
+ export function hasAcceptedTerms(agentSlug: string, termsUrl: string): boolean {
59
+ const record = loadConsent(agentSlug);
60
+ if (!record) return false;
61
+ // If the link has changed since acceptance, require fresh acknowledgement.
62
+ return record.termsUrl === termsUrl;
63
+ }
64
+
65
+ export function saveConsent(agentSlug: string, termsUrl: string): void {
66
+ if (!agentSlug) return;
67
+ const ls = safeLocalStorage();
68
+ if (!ls) return;
69
+ const record: ConsentRecord = {
70
+ acceptedAt: Date.now(),
71
+ termsUrl,
72
+ };
73
+ try {
74
+ ls.setItem(storageKey(agentSlug), JSON.stringify(record));
75
+ } catch {
76
+ // Quota or privacy mode — silently drop.
77
+ }
78
+ }
@@ -0,0 +1,280 @@
1
+ /**
2
+ * createVoiceAgent — the headless entry point of the SDK.
3
+ *
4
+ * A developer adds their agent slug (and optionally an API key) and everything
5
+ * initializes exactly like the widget: appearance is fetched, the voice
6
+ * controller + form controller are wired, agent-triggered forms surface as
7
+ * events. Build a fully custom UI on the events, or mount the prebuilt UI from
8
+ * `@oshara/voice-sdk/ui`.
9
+ */
10
+
11
+ import { trackWidgetEvent } from "./analytics";
12
+ import {
13
+ AppearanceConfig,
14
+ DEFAULT_APPEARANCE,
15
+ fetchAppearance,
16
+ } from "./appearance";
17
+ import {
18
+ AudioPrefs,
19
+ AudioCapabilities,
20
+ AudioDevices,
21
+ DEFAULT_AUDIO_PREFS,
22
+ enumerateAudioDevices,
23
+ loadAudioPrefs,
24
+ probeAudioCapabilities,
25
+ } from "./audioSettings";
26
+ import { Emitter, EventHandler, EventName } from "./events";
27
+ import {
28
+ createFormController,
29
+ FormController,
30
+ } from "./formController";
31
+ import {
32
+ FormDefinition,
33
+ extractFormDraft,
34
+ handleFormAction,
35
+ matchForm,
36
+ } from "./forms";
37
+ import { fetchSession, warnIfBrowserSecret } from "./transport";
38
+ import type { DeepFilterUrls } from "./types";
39
+ import {
40
+ AudioStateSnapshot,
41
+ AudioStats,
42
+ createVoiceController,
43
+ VoiceController,
44
+ } from "./voice";
45
+
46
+ export interface VoiceAgentConfig {
47
+ /** AICharacter.slug on the backend. Required. */
48
+ agentSlug: string;
49
+ /** Backend base URL. Default "https://api.oshara.ai". */
50
+ apiUrl?: string;
51
+ /**
52
+ * Secret API key (sk_…) sent as `x-api-key`, bypassing origin gating.
53
+ * OMIT for public browser embeds (rely on the origin allow-list). Intended
54
+ * for server-side / Node usage; warns if used in a browser.
55
+ */
56
+ apiKey?: string;
57
+ /** Override the appearance endpoint URL. */
58
+ appearanceUrl?: string;
59
+ /** UI language (BCP-47 short). Default "en". */
60
+ language?: string;
61
+ /** DeepFilterNet3 asset overrides (same defaults/precedence as the widget). */
62
+ deepFilter?: DeepFilterUrls;
63
+ /** Auto-fetch appearance during init() (default true). */
64
+ fetchAppearanceOnInit?: boolean;
65
+ /** Disable analytics events (default false). */
66
+ disableAnalytics?: boolean;
67
+ /** Custom fetch (Node <18 / testing). Defaults to globalThis.fetch. */
68
+ fetch?: typeof fetch;
69
+ /**
70
+ * Seed audio preferences. Merged over DEFAULT_AUDIO_PREFS and (when
71
+ * persistAudioPrefs) the stored per-agent prefs.
72
+ */
73
+ audio?: Partial<AudioPrefs>;
74
+ /** Read/write audio prefs to localStorage (default true). */
75
+ persistAudioPrefs?: boolean;
76
+ }
77
+
78
+ export interface VoiceAgentClient {
79
+ /** The AICharacter slug this client targets. */
80
+ readonly agentSlug: string;
81
+ /** Fetch appearance (if enabled) and prepare the agent. Idempotent. */
82
+ init: () => Promise<{ appearance: AppearanceConfig }>;
83
+ /** Start a call. */
84
+ start: () => Promise<void>;
85
+ /** End the call. */
86
+ end: () => Promise<void>;
87
+ /** Tear down listeners + any active call. */
88
+ destroy: () => void;
89
+
90
+ toggleMute: () => Promise<boolean>;
91
+ isActive: () => boolean;
92
+ sessionId: () => string | null;
93
+ /** Send a typed text message to the agent (also emits a transcript line). */
94
+ sendText: (text: string) => Promise<void>;
95
+ publishData: (payload: unknown, topic: string) => Promise<void>;
96
+
97
+ updateAudioSettings: (
98
+ delta: Partial<AudioPrefs>,
99
+ ) => Promise<AudioStateSnapshot>;
100
+ getAudioState: () => AudioStateSnapshot;
101
+ getAudioStats: () => Promise<AudioStats | null>;
102
+ enumerateAudioDevices: () => Promise<AudioDevices>;
103
+ getAudioCapabilities: () => AudioCapabilities;
104
+
105
+ // ── forms ──
106
+ getActiveForm: () => ReturnType<FormController["getActive"]>;
107
+ /** Push on-screen form edits into the model (custom UIs call this on input). */
108
+ updateFormValues: (values: Record<string, string>) => void;
109
+ stepForm: (direction: "next" | "back" | number) => void;
110
+ submitForm: () => void;
111
+ closeForm: () => void;
112
+ /** Programmatically open a form. */
113
+ openForm: (definition: FormDefinition, draft?: Record<string, string>) => void;
114
+
115
+ getAppearance: () => AppearanceConfig;
116
+ setLanguage: (code: string) => void;
117
+ getLanguage: () => string;
118
+
119
+ /** Report an engagement event (widget_loaded / bubble_clicked). */
120
+ trackEvent: (
121
+ type: "widget_loaded" | "bubble_clicked",
122
+ metadata?: Record<string, unknown>,
123
+ ) => void;
124
+
125
+ on: <K extends EventName>(event: K, handler: EventHandler<K>) => () => void;
126
+ off: <K extends EventName>(event: K, handler: EventHandler<K>) => void;
127
+ }
128
+
129
+ const DEFAULT_API_URL = "https://api.oshara.ai";
130
+
131
+ export function createVoiceAgent(config: VoiceAgentConfig): VoiceAgentClient {
132
+ if (!config.agentSlug) {
133
+ // eslint-disable-next-line no-console
134
+ console.error("[voice-agent] agentSlug is required");
135
+ }
136
+ warnIfBrowserSecret(config.apiKey);
137
+
138
+ const apiUrl = config.apiUrl ?? DEFAULT_API_URL;
139
+ const persistAudioPrefs = config.persistAudioPrefs ?? true;
140
+ let language = config.language ?? "en";
141
+
142
+ const emitter = new Emitter();
143
+ const emit = emitter.emit;
144
+
145
+ // Resolve audio prefs: defaults < stored (if persisting) < explicit config.
146
+ const initialPrefs: AudioPrefs = {
147
+ ...DEFAULT_AUDIO_PREFS,
148
+ ...(persistAudioPrefs ? loadAudioPrefs(config.agentSlug) : {}),
149
+ ...(config.audio ?? {}),
150
+ };
151
+
152
+ let appearance: AppearanceConfig = DEFAULT_APPEARANCE;
153
+ const getAppearance = () => appearance;
154
+
155
+ const voice: VoiceController = createVoiceController({
156
+ fetchSession: () =>
157
+ fetchSession({
158
+ apiUrl,
159
+ agentSlug: config.agentSlug,
160
+ language,
161
+ apiKey: config.apiKey,
162
+ fetch: config.fetch,
163
+ }),
164
+ agentSlug: config.agentSlug,
165
+ getAppearance,
166
+ emit,
167
+ deepFilter: config.deepFilter,
168
+ initialPrefs,
169
+ persistPrefs: persistAudioPrefs,
170
+ });
171
+
172
+ const forms: FormController = createFormController({
173
+ emit,
174
+ voice,
175
+ apiUrl,
176
+ agentSlug: config.agentSlug,
177
+ apiKey: config.apiKey,
178
+ fetch: config.fetch,
179
+ });
180
+
181
+ // Route agent data messages → handoff / form actions / form render.
182
+ emitter.on("data", ({ data, topic }) => {
183
+ if (topic === "voice.agent_handoff") {
184
+ const agentName =
185
+ data && typeof data === "object" &&
186
+ typeof (data as { agent_name?: unknown }).agent_name === "string"
187
+ ? ((data as { agent_name: string }).agent_name || "").trim()
188
+ : "";
189
+ emit("agent:handoff", { agentName });
190
+ return;
191
+ }
192
+ // Agent-driven step / submit / close. Must run before matchForm.
193
+ if (handleFormAction(topic, data, forms)) return;
194
+
195
+ const match = matchForm(topic, data, getAppearance().forms);
196
+ if (!match) return;
197
+ const draft = extractFormDraft(data, match) ?? {};
198
+ if (forms.current() === match.id) forms.merge(draft);
199
+ else forms.open(match, draft);
200
+ });
201
+
202
+ const trackEvent = (
203
+ type: "widget_loaded" | "bubble_clicked",
204
+ metadata: Record<string, unknown> = {},
205
+ ) =>
206
+ trackWidgetEvent(
207
+ {
208
+ apiUrl,
209
+ agentSlug: config.agentSlug,
210
+ apiKey: config.apiKey,
211
+ fetch: config.fetch,
212
+ disabled: config.disableAnalytics,
213
+ },
214
+ type,
215
+ metadata,
216
+ );
217
+
218
+ let initialized = false;
219
+ const init = async (): Promise<{ appearance: AppearanceConfig }> => {
220
+ if (!initialized && (config.fetchAppearanceOnInit ?? true)) {
221
+ appearance = await fetchAppearance({
222
+ appearanceUrl: config.appearanceUrl ?? "",
223
+ apiUrl,
224
+ slug: config.agentSlug,
225
+ apiKey: config.apiKey,
226
+ fetch: config.fetch,
227
+ });
228
+ }
229
+ initialized = true;
230
+ emit("appearance", appearance);
231
+ return { appearance };
232
+ };
233
+
234
+ const destroy = () => {
235
+ if (voice.isActive()) void voice.end();
236
+ emitter.clear();
237
+ };
238
+
239
+ return {
240
+ agentSlug: config.agentSlug,
241
+ init,
242
+ start: voice.start,
243
+ end: voice.end,
244
+ destroy,
245
+ toggleMute: voice.toggleMute,
246
+ isActive: voice.isActive,
247
+ sessionId: voice.sessionId,
248
+ sendText: async (text: string) => {
249
+ const t = (text || "").trim();
250
+ if (!t || !voice.isActive()) return;
251
+ emit("transcript", {
252
+ role: "user",
253
+ segmentId: `typed-${Date.now()}`,
254
+ text: t,
255
+ isFinal: true,
256
+ });
257
+ await voice.publishData({ type: "user_text", text: t }, "voice.user_text");
258
+ },
259
+ publishData: voice.publishData,
260
+ updateAudioSettings: voice.updateAudioSettings,
261
+ getAudioState: voice.getAudioState,
262
+ getAudioStats: voice.getAudioStats,
263
+ enumerateAudioDevices,
264
+ getAudioCapabilities: probeAudioCapabilities,
265
+ getActiveForm: forms.getActive,
266
+ updateFormValues: forms.updateValues,
267
+ stepForm: forms.step,
268
+ submitForm: forms.submit,
269
+ closeForm: forms.close,
270
+ openForm: forms.open,
271
+ getAppearance,
272
+ setLanguage: (code: string) => {
273
+ language = code || "en";
274
+ },
275
+ getLanguage: () => language,
276
+ trackEvent,
277
+ on: emitter.on.bind(emitter),
278
+ off: emitter.off.bind(emitter),
279
+ };
280
+ }
@@ -0,0 +1,120 @@
1
+ /**
2
+ * Typed event emitter for the voice-agent SDK.
3
+ *
4
+ * The headless core publishes everything that the widget used to do via direct
5
+ * UI calls (orb state, transcripts, call status, mute, forms, …) as typed
6
+ * events. The prebuilt UI subscribes to these and renders; custom UIs do the
7
+ * same. This is the seam that decouples logic from presentation.
8
+ */
9
+
10
+ import type { AppearanceConfig } from "./appearance";
11
+ import type { FormDefinition, FieldValidationError } from "./forms";
12
+ import type { AudioStateSnapshot } from "./voice";
13
+ import type { OrbState, ConnectionPhase, ControlsState } from "./types";
14
+
15
+ export interface VoiceAgentEvents {
16
+ /** Orb state machine + contextual status line (e.g. "Searching…"). */
17
+ state: { orb: OrbState; statusLabel: string | null };
18
+ /** Free-form call status text (Connecting…, Connected, Muted, errors). */
19
+ "call:status": { status: string };
20
+ /** Remaining call time in ms, or null to hide the timer. */
21
+ "call:timer": { remainingMs: number | null };
22
+ /** Connection lifecycle. UI maps this to which screen to show. */
23
+ connection: { phase: ConnectionPhase; error?: string };
24
+ /** Which call controls should be enabled. */
25
+ controls: ControlsState;
26
+ /** A transcript segment (interim or final) for either party. */
27
+ transcript: {
28
+ role: "user" | "agent";
29
+ segmentId: string;
30
+ text: string;
31
+ isFinal: boolean;
32
+ };
33
+ /** Clear all transcript content (new call). */
34
+ "transcript:clear": Record<string, never>;
35
+ /** A system transcript line (e.g. agent handoff transition message). */
36
+ "transcript:system": { text: string };
37
+ /** Mute state changed. */
38
+ mute: { muted: boolean };
39
+ /** Audio preferences / applied settings snapshot changed. */
40
+ audio: AudioStateSnapshot;
41
+ /** Raw JSON data message received from the agent over LiveKit. */
42
+ data: { data: unknown; topic: string | undefined };
43
+ /** A form should be shown (agent-triggered or programmatic). */
44
+ "form:show": {
45
+ definition: FormDefinition;
46
+ draft: Record<string, string>;
47
+ stepIndex: number;
48
+ inCall: boolean;
49
+ transcriptionEnabled: boolean;
50
+ };
51
+ /** The form values / current step changed (agent merge or step nav). */
52
+ "form:update": { values: Record<string, string>; stepIndex: number };
53
+ /** Client-side validation failed; render these inline errors. */
54
+ "form:validation": { errors: FieldValidationError[] };
55
+ /** A submit POST is in flight. */
56
+ "form:submitting": Record<string, never>;
57
+ /** A form was submitted successfully. */
58
+ "form:submitted": {
59
+ formId: string;
60
+ values: Record<string, string>;
61
+ successMessage: string;
62
+ };
63
+ /** A submit POST failed. */
64
+ "form:error": { message: string };
65
+ /** The active form was closed. */
66
+ "form:close": Record<string, never>;
67
+ /** Mid-call handoff to another agent. */
68
+ "agent:handoff": { agentName: string };
69
+ /** Appearance config resolved (after init fetch). */
70
+ appearance: AppearanceConfig;
71
+ /** A non-fatal error occurred in some scope. */
72
+ error: { scope: string; error: Error };
73
+ }
74
+
75
+ export type EventName = keyof VoiceAgentEvents;
76
+ export type EventHandler<K extends EventName> = (
77
+ payload: VoiceAgentEvents[K],
78
+ ) => void;
79
+ export type Emit = <K extends EventName>(
80
+ event: K,
81
+ payload: VoiceAgentEvents[K],
82
+ ) => void;
83
+
84
+ /** Minimal typed event emitter. No deps, works in browser and Node. */
85
+ export class Emitter {
86
+ private listeners = new Map<EventName, Set<EventHandler<EventName>>>();
87
+
88
+ on<K extends EventName>(event: K, handler: EventHandler<K>): () => void {
89
+ let set = this.listeners.get(event);
90
+ if (!set) {
91
+ set = new Set();
92
+ this.listeners.set(event, set);
93
+ }
94
+ set.add(handler as EventHandler<EventName>);
95
+ return () => this.off(event, handler);
96
+ }
97
+
98
+ off<K extends EventName>(event: K, handler: EventHandler<K>): void {
99
+ this.listeners.get(event)?.delete(handler as EventHandler<EventName>);
100
+ }
101
+
102
+ emit: Emit = (event, payload) => {
103
+ const set = this.listeners.get(event);
104
+ if (!set) return;
105
+ // Iterate a copy so a handler can unsubscribe during dispatch.
106
+ for (const handler of Array.from(set)) {
107
+ try {
108
+ (handler as EventHandler<typeof event>)(payload);
109
+ } catch (err) {
110
+ // A faulty listener must never break the controller's control flow.
111
+ // eslint-disable-next-line no-console
112
+ console.error(`[voice-agent] listener for "${String(event)}" threw`, err);
113
+ }
114
+ }
115
+ };
116
+
117
+ clear(): void {
118
+ this.listeners.clear();
119
+ }
120
+ }