@oshara/voice-sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. package/README.md +198 -0
  2. package/dist/appearance-CNWT8x1G.cjs +2 -0
  3. package/dist/appearance-CNWT8x1G.cjs.map +1 -0
  4. package/dist/appearance-i6QBkpCk.js +650 -0
  5. package/dist/appearance-i6QBkpCk.js.map +1 -0
  6. package/dist/consent-CK9VXNPa.js +54 -0
  7. package/dist/consent-CK9VXNPa.js.map +1 -0
  8. package/dist/consent-D7QNSkQD.cjs +2 -0
  9. package/dist/consent-D7QNSkQD.cjs.map +1 -0
  10. package/dist/core/analytics.d.ts +30 -0
  11. package/dist/core/appearance.d.ts +113 -0
  12. package/dist/core/audioSettings.d.ts +69 -0
  13. package/dist/core/consent.d.ts +17 -0
  14. package/dist/core/createVoiceAgent.d.ts +79 -0
  15. package/dist/core/events.d.ts +103 -0
  16. package/dist/core/formController.d.ts +28 -0
  17. package/dist/core/forms.d.ts +235 -0
  18. package/dist/core/index.d.ts +29 -0
  19. package/dist/core/prevContext.d.ts +26 -0
  20. package/dist/core/transport.d.ts +30 -0
  21. package/dist/core/types.d.ts +49 -0
  22. package/dist/core/voice.d.ts +79 -0
  23. package/dist/createVoiceAgent-BM3HODS6.js +1058 -0
  24. package/dist/createVoiceAgent-BM3HODS6.js.map +1 -0
  25. package/dist/createVoiceAgent-CJWxWzz6.cjs +4 -0
  26. package/dist/createVoiceAgent-CJWxWzz6.cjs.map +1 -0
  27. package/dist/index.cjs +2 -0
  28. package/dist/index.cjs.map +1 -0
  29. package/dist/index.js +44 -0
  30. package/dist/index.js.map +1 -0
  31. package/dist/react/index.d.ts +60 -0
  32. package/dist/react.cjs +2 -0
  33. package/dist/react.cjs.map +1 -0
  34. package/dist/react.js +115 -0
  35. package/dist/react.js.map +1 -0
  36. package/dist/styles.css +1838 -0
  37. package/dist/ui/index.d.ts +21 -0
  38. package/dist/ui/ui.d.ts +165 -0
  39. package/dist/ui.cjs +284 -0
  40. package/dist/ui.cjs.map +1 -0
  41. package/dist/ui.js +1153 -0
  42. package/dist/ui.js.map +1 -0
  43. package/package.json +67 -0
  44. package/src/core/analytics.ts +111 -0
  45. package/src/core/appearance.ts +464 -0
  46. package/src/core/audioSettings.ts +180 -0
  47. package/src/core/consent.ts +78 -0
  48. package/src/core/createVoiceAgent.ts +280 -0
  49. package/src/core/events.ts +120 -0
  50. package/src/core/formController.ts +317 -0
  51. package/src/core/forms.ts +861 -0
  52. package/src/core/index.ts +121 -0
  53. package/src/core/prevContext.ts +153 -0
  54. package/src/core/transport.ts +118 -0
  55. package/src/core/types.ts +66 -0
  56. package/src/core/voice.ts +1179 -0
  57. package/src/react/index.ts +238 -0
  58. package/src/ui/index.ts +507 -0
  59. package/src/ui/styles.css +1838 -0
  60. package/src/ui/ui.ts +1672 -0
  61. package/src/vite-env.d.ts +10 -0
@@ -0,0 +1,121 @@
1
+ /**
2
+ * @oshara/voice-sdk — headless core.
3
+ *
4
+ * `import { createVoiceAgent } from "@oshara/voice-sdk"` pulls in zero DOM/UI
5
+ * code. Build a custom UI on the events, or mount the prebuilt UI from
6
+ * `@oshara/voice-sdk/ui`.
7
+ */
8
+
9
+ export { createVoiceAgent } from "./createVoiceAgent";
10
+ export type { VoiceAgentConfig, VoiceAgentClient } from "./createVoiceAgent";
11
+
12
+ // Events
13
+ export { Emitter } from "./events";
14
+ export type {
15
+ VoiceAgentEvents,
16
+ EventName,
17
+ EventHandler,
18
+ Emit,
19
+ } from "./events";
20
+
21
+ // Core types
22
+ export type {
23
+ OrbState,
24
+ SessionInit,
25
+ ConnectionPhase,
26
+ ControlsState,
27
+ DeepFilterUrls,
28
+ } from "./types";
29
+ export { DEFAULT_DEEPFILTER_MODULE_URL } from "./types";
30
+
31
+ // Voice controller (advanced / direct use)
32
+ export {
33
+ createVoiceController,
34
+ } from "./voice";
35
+ export type {
36
+ VoiceController,
37
+ VoiceControllerOptions,
38
+ AudioStateSnapshot,
39
+ AudioStats,
40
+ } from "./voice";
41
+
42
+ // Forms
43
+ export {
44
+ createFormController,
45
+ } from "./formController";
46
+ export type { FormController } from "./formController";
47
+ export {
48
+ buildFieldSchema,
49
+ collectInputFields,
50
+ extractFormDraft,
51
+ fieldsForStep,
52
+ handleFormAction,
53
+ initialFormValues,
54
+ matchForm,
55
+ mergeFormDraft,
56
+ normalizeFormDefinitions,
57
+ submitForm,
58
+ totalSteps,
59
+ validateFields,
60
+ DEFAULT_FORM_DEFINITIONS,
61
+ } from "./forms";
62
+ export type {
63
+ FormDefinition,
64
+ FormFieldDef,
65
+ FormFieldType,
66
+ FormFieldOption,
67
+ FormStep,
68
+ FormLayout,
69
+ FormStateSnapshot,
70
+ FieldValidationError,
71
+ } from "./forms";
72
+
73
+ // Appearance
74
+ export {
75
+ fetchAppearance,
76
+ mergeAppearance,
77
+ DEFAULT_APPEARANCE,
78
+ } from "./appearance";
79
+ export type {
80
+ AppearanceConfig,
81
+ AppearanceTheme,
82
+ AppearanceDimensions,
83
+ AppearanceLayout,
84
+ AppearanceLabels,
85
+ AppearanceLanguage,
86
+ WidgetPosition,
87
+ } from "./appearance";
88
+
89
+ // Audio settings
90
+ export {
91
+ DEFAULT_AUDIO_PREFS,
92
+ loadAudioPrefs,
93
+ saveAudioPrefs,
94
+ enumerateAudioDevices,
95
+ probeAudioCapabilities,
96
+ } from "./audioSettings";
97
+ export type {
98
+ AudioPrefs,
99
+ NoiseFilterEngine,
100
+ AudioDevices,
101
+ AudioCapabilities,
102
+ } from "./audioSettings";
103
+
104
+ // Persistence helpers
105
+ export {
106
+ loadPrevContext,
107
+ savePrevContext,
108
+ clearPrevContext,
109
+ formatPrevContextForAgent,
110
+ } from "./prevContext";
111
+ export type { PrevTurn } from "./prevContext";
112
+ export { loadConsent, saveConsent, hasAcceptedTerms } from "./consent";
113
+ export { trackWidgetEvent, getVisitorId } from "./analytics";
114
+
115
+ // Transport
116
+ export {
117
+ buildHeaders,
118
+ fetchSession,
119
+ warnIfBrowserSecret,
120
+ } from "./transport";
121
+ export type { TransportConfig, FetchSessionArgs } from "./transport";
@@ -0,0 +1,153 @@
1
+ /**
2
+ * Per-agent storage for the most-recent conversation transcript, so the
3
+ * agent can be primed with prior context when the user returns.
4
+ *
5
+ * The widget stores the *raw last turns* (not an LLM-generated summary) —
6
+ * the agent-side LLM is perfectly capable of using the transcript as
7
+ * context, and skipping a summarization step keeps the widget standalone.
8
+ */
9
+
10
+ const STORAGE_PREFIX = "voiceAgent.prevContext";
11
+ const MAX_STORED_CHARS = 4000;
12
+ const MAX_STORED_TURNS = 30;
13
+ const MAX_TURN_CHARS = 500;
14
+ const MAX_SUMMARY_CHARS = 1500;
15
+ /** Drop saved context older than this so stale conversations don't haunt new ones. */
16
+ const STALE_AFTER_MS = 14 * 24 * 60 * 60 * 1000;
17
+
18
+ export interface PrevTurn {
19
+ role: "user" | "agent";
20
+ text: string;
21
+ }
22
+
23
+ export interface PrevContextRecord {
24
+ savedAt: number;
25
+ turns: PrevTurn[];
26
+ /** Agent-generated short user-profile summary (preferred over raw turns
27
+ * when priming the next session). May be empty. */
28
+ summary?: string;
29
+ }
30
+
31
+ function storageKey(agentSlug: string): string {
32
+ return `${STORAGE_PREFIX}.${agentSlug}`;
33
+ }
34
+
35
+ function safeLocalStorage(): Storage | null {
36
+ try {
37
+ return window.localStorage;
38
+ } catch {
39
+ return null;
40
+ }
41
+ }
42
+
43
+ export function loadPrevContext(agentSlug: string): PrevContextRecord | null {
44
+ if (!agentSlug) return null;
45
+ const ls = safeLocalStorage();
46
+ if (!ls) return null;
47
+
48
+ let raw: string | null = null;
49
+ try {
50
+ raw = ls.getItem(storageKey(agentSlug));
51
+ } catch {
52
+ return null;
53
+ }
54
+ if (!raw) return null;
55
+
56
+ let parsed: unknown;
57
+ try {
58
+ parsed = JSON.parse(raw);
59
+ } catch {
60
+ return null;
61
+ }
62
+
63
+ if (!parsed || typeof parsed !== "object") return null;
64
+ const obj = parsed as Partial<PrevContextRecord>;
65
+ const savedAt = typeof obj.savedAt === "number" ? obj.savedAt : 0;
66
+ if (!savedAt || Date.now() - savedAt > STALE_AFTER_MS) {
67
+ clearPrevContext(agentSlug);
68
+ return null;
69
+ }
70
+
71
+ const turns: PrevTurn[] = [];
72
+ if (Array.isArray(obj.turns)) {
73
+ for (const t of obj.turns) {
74
+ if (!t || typeof t !== "object") continue;
75
+ const role = (t as PrevTurn).role;
76
+ const text = (t as PrevTurn).text;
77
+ if ((role !== "user" && role !== "agent") || typeof text !== "string") continue;
78
+ const cleaned = text.trim();
79
+ if (!cleaned) continue;
80
+ turns.push({ role, text: cleaned });
81
+ }
82
+ }
83
+ const summary =
84
+ typeof obj.summary === "string" ? obj.summary.trim().slice(0, MAX_SUMMARY_CHARS) : "";
85
+ if (!turns.length && !summary) return null;
86
+ return { savedAt, turns, summary };
87
+ }
88
+
89
+ export function savePrevContext(
90
+ agentSlug: string,
91
+ turns: PrevTurn[],
92
+ summary?: string,
93
+ ): void {
94
+ if (!agentSlug) return;
95
+ const ls = safeLocalStorage();
96
+ if (!ls) return;
97
+
98
+ // Trim per-turn, drop empties, keep most recent MAX_STORED_TURNS, then
99
+ // truncate from the front until we fit MAX_STORED_CHARS.
100
+ const cleaned: PrevTurn[] = [];
101
+ for (const t of turns) {
102
+ const text = (t.text || "").trim().slice(0, MAX_TURN_CHARS);
103
+ if (!text) continue;
104
+ cleaned.push({ role: t.role, text });
105
+ }
106
+ const recent = cleaned.slice(-MAX_STORED_TURNS);
107
+
108
+ let totalChars = recent.reduce((n, t) => n + t.text.length, 0);
109
+ while (recent.length > 1 && totalChars > MAX_STORED_CHARS) {
110
+ const dropped = recent.shift();
111
+ if (dropped) totalChars -= dropped.text.length;
112
+ }
113
+
114
+ const trimmedSummary = (summary || "").trim().slice(0, MAX_SUMMARY_CHARS);
115
+ if (!recent.length && !trimmedSummary) {
116
+ clearPrevContext(agentSlug);
117
+ return;
118
+ }
119
+
120
+ const record: PrevContextRecord = {
121
+ savedAt: Date.now(),
122
+ turns: recent,
123
+ summary: trimmedSummary,
124
+ };
125
+ try {
126
+ ls.setItem(storageKey(agentSlug), JSON.stringify(record));
127
+ } catch {
128
+ // Quota or privacy mode — silently drop.
129
+ }
130
+ }
131
+
132
+ export function clearPrevContext(agentSlug: string): void {
133
+ if (!agentSlug) return;
134
+ const ls = safeLocalStorage();
135
+ if (!ls) return;
136
+ try {
137
+ ls.removeItem(storageKey(agentSlug));
138
+ } catch {
139
+ // ignore
140
+ }
141
+ }
142
+
143
+ /** Format stored context for the agent's system prompt.
144
+ * Prefers the agent-generated user-profile summary when available; falls
145
+ * back to a labelled raw-turn transcript otherwise. */
146
+ export function formatPrevContextForAgent(record: PrevContextRecord): string {
147
+ const summary = (record.summary || "").trim();
148
+ if (summary) return `User profile from previous session:\n${summary}`;
149
+ const lines = record.turns.map((t) =>
150
+ t.role === "user" ? `User: ${t.text}` : `Assistant: ${t.text}`,
151
+ );
152
+ return lines.join("\n");
153
+ }
@@ -0,0 +1,118 @@
1
+ /**
2
+ * HTTP transport helpers shared by every backend call the SDK makes
3
+ * (session minting, appearance, form submit, analytics).
4
+ *
5
+ * Two auth modes (see docs/Authentication):
6
+ * - Public browser embed (default): no key. The backend gates on the
7
+ * AICharacter's origin allow-list. This is exactly how the widget works.
8
+ * - Trusted/server mode: an `sk_…` key sent as `x-api-key`, bypassing origin
9
+ * gating. Intended for Node / server-side usage. We warn loudly if such a
10
+ * secret key is used in a browser, where it would be world-readable.
11
+ */
12
+
13
+ import type { SessionInit } from "./types";
14
+
15
+ export interface TransportConfig {
16
+ apiUrl: string;
17
+ /** Secret API key (sk_…). Sent as `x-api-key`. Omit for public embeds. */
18
+ apiKey?: string;
19
+ /** Custom fetch (Node <18 / testing). Defaults to globalThis.fetch. */
20
+ fetch?: typeof fetch;
21
+ }
22
+
23
+ /** Merge auth headers onto a base header set. */
24
+ export function buildHeaders(
25
+ config: Pick<TransportConfig, "apiKey">,
26
+ base: Record<string, string> = {},
27
+ ): Record<string, string> {
28
+ const headers: Record<string, string> = { ...base };
29
+ const key = config.apiKey?.trim();
30
+ if (key) headers["x-api-key"] = key;
31
+ return headers;
32
+ }
33
+
34
+ /** Resolve a usable fetch implementation or throw a helpful error. */
35
+ export function resolveFetch(fetchImpl?: typeof fetch): typeof fetch {
36
+ if (fetchImpl) return fetchImpl;
37
+ if (typeof globalThis.fetch === "function") {
38
+ return globalThis.fetch.bind(globalThis);
39
+ }
40
+ throw new Error(
41
+ "[voice-agent] No global fetch available. Pass `fetch` in the SDK config (Node < 18).",
42
+ );
43
+ }
44
+
45
+ /**
46
+ * Warn when a secret key is embedded in browser JS, where it is exposed to
47
+ * anyone who views source. Call once at init.
48
+ */
49
+ export function warnIfBrowserSecret(apiKey?: string): void {
50
+ if (
51
+ apiKey &&
52
+ apiKey.trim().startsWith("sk_") &&
53
+ typeof window !== "undefined"
54
+ ) {
55
+ // eslint-disable-next-line no-console
56
+ console.warn(
57
+ "[voice-agent] A secret API key (sk_…) was provided in a browser context — " +
58
+ "it is visible to anyone who views the page. For public embeds omit `apiKey` " +
59
+ "and rely on origin allow-listing; reserve `apiKey` for server-side/Node usage.",
60
+ );
61
+ }
62
+ }
63
+
64
+ export interface FetchSessionArgs {
65
+ apiUrl: string;
66
+ agentSlug: string;
67
+ language: string;
68
+ apiKey?: string;
69
+ fetch?: typeof fetch;
70
+ /** Page URL recorded server-side; defaults to window.location.href. */
71
+ originUrl?: string;
72
+ /** Optional extra fields honored only with a valid x-api-key (system_prompt, etc.). */
73
+ extra?: Record<string, unknown>;
74
+ }
75
+
76
+ /** Mint a LiveKit session token. Moved verbatim from the widget's fetchSession. */
77
+ export async function fetchSession(args: FetchSessionArgs): Promise<SessionInit> {
78
+ const doFetch = resolveFetch(args.fetch);
79
+ // The browser's Origin/Referer headers don't survive server-side relays.
80
+ // Send the page URL explicitly so the backend can record where the session
81
+ // was initiated regardless of the network path.
82
+ const originUrl =
83
+ args.originUrl ??
84
+ ((typeof window !== "undefined" && window.location?.href) || "");
85
+
86
+ const r = await doFetch(`${args.apiUrl}/api/agents/agent-session/`, {
87
+ method: "POST",
88
+ headers: buildHeaders(args, { "Content-Type": "application/json" }),
89
+ body: JSON.stringify({
90
+ agent: args.agentSlug,
91
+ origin_url: originUrl,
92
+ language: args.language,
93
+ ...(args.extra ?? {}),
94
+ }),
95
+ });
96
+
97
+ let body: any;
98
+ try {
99
+ body = await r.json();
100
+ } catch {
101
+ throw new Error(`session request failed (${r.status}): non-JSON response`);
102
+ }
103
+
104
+ if (!r.ok || body?.success === false) {
105
+ const msg = body?.message || `HTTP ${r.status}`;
106
+ throw new Error(`session request failed: ${msg}`);
107
+ }
108
+
109
+ const data = body?.data ?? body;
110
+ if (!data?.livekit_url || !data?.token) {
111
+ throw new Error(
112
+ `session response missing livekit_url/token (got keys: ${Object.keys(
113
+ data || {},
114
+ ).join(",")})`,
115
+ );
116
+ }
117
+ return data as SessionInit;
118
+ }
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Cross-cutting types shared between the headless core, the prebuilt UI, and
3
+ * external SDK consumers. Kept free of any DOM or LiveKit imports so this
4
+ * module is safe to import from anywhere (including Node).
5
+ */
6
+
7
+ /** Visual state of the agent orb / call indicator. */
8
+ export type OrbState =
9
+ | "idle"
10
+ | "listening"
11
+ | "speaking"
12
+ | "connecting"
13
+ | "thinking";
14
+
15
+ /** Shape returned by Django's `POST /api/agents/agent-session/`. */
16
+ export interface SessionInit {
17
+ token: string;
18
+ livekit_url: string;
19
+ room_name: string;
20
+ participant_identity: string;
21
+ expires_in_seconds: number;
22
+ session_id: string;
23
+ character_slug: string;
24
+ system_prompt: string;
25
+ greeting: string;
26
+ voice_inference_url: string;
27
+ }
28
+
29
+ /** High-level connection lifecycle phase, surfaced via the `connection` event. */
30
+ export type ConnectionPhase =
31
+ | "connecting"
32
+ | "connected"
33
+ | "reconnecting"
34
+ | "disconnected"
35
+ | "failed";
36
+
37
+ /**
38
+ * Which call controls the UI should enable. Replaces the direct
39
+ * `refs.startBtn.disabled = …` side effects the controller used to perform.
40
+ */
41
+ export interface ControlsState {
42
+ canStart: boolean;
43
+ canMute: boolean;
44
+ canEnd: boolean;
45
+ }
46
+
47
+ /**
48
+ * DeepFilterNet3 asset overrides. Mirrors the widget's boot config exactly,
49
+ * including precedence: `wasmUrl`/`onnxUrl` take priority over `cdnUrl`.
50
+ * Passed to the voice controller per-instance (no module-level globals) so
51
+ * multiple agents can coexist on one page.
52
+ */
53
+ export interface DeepFilterUrls {
54
+ /** Base CDN URL; the package appends `/v2/pkg/df_bg.wasm` etc. Empty = default. */
55
+ cdnUrl?: string;
56
+ /** Direct URL to `df_bg.wasm`. Takes precedence over `cdnUrl`. */
57
+ wasmUrl?: string;
58
+ /** Direct URL to `DeepFilterNet3_onnx.tar.gz`. Takes precedence over `cdnUrl`. */
59
+ onnxUrl?: string;
60
+ /** Where the DeepFilterNet3 ESM module is loaded from. Empty = esm.sh default. */
61
+ moduleUrl?: string;
62
+ }
63
+
64
+ /** Default DeepFilterNet3 ESM module mirror (esm.sh) when none is configured. */
65
+ export const DEFAULT_DEEPFILTER_MODULE_URL =
66
+ "https://esm.sh/deepfilternet3-noise-filter@1.2.1";