@vox-ai/client 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,200 @@
1
+ # @vox-ai/client
2
+
3
+ vox.ai voice agent와 브라우저에서 직접 연결하기 위한 JavaScript SDK.
4
+
5
+ ## 설치
6
+
7
+ ```bash
8
+ npm install @vox-ai/client
9
+ # or
10
+ yarn add @vox-ai/client
11
+ # or
12
+ pnpm add @vox-ai/client
13
+ ```
14
+
15
+ ## 빠른 시작
16
+
17
+ ```ts
18
+ import { Conversation } from "@vox-ai/client";
19
+
20
+ // 마이크 권한 요청 (UI에서 사전 안내 권장)
21
+ await navigator.mediaDevices.getUserMedia({ audio: true });
22
+
23
+ const conversation = await Conversation.startSession({
24
+ agentId: "YOUR_AGENT_ID",
25
+ apiKey: "YOUR_API_KEY",
26
+ onStatusChange: (status) => console.log("status:", status),
27
+ onModeChange: (mode) => console.log("mode:", mode),
28
+ onMessage: (message) => console.log(`${message.source}: ${message.text}`),
29
+ onError: (error) => console.error("error:", error.message),
30
+ });
31
+ ```
32
+
33
+ ## 세션 옵션
34
+
35
+ `Conversation.startSession(options)`에 전달하는 설정.
36
+
37
+ | 옵션 | 타입 | 필수 | 설명 |
38
+ |------|------|------|------|
39
+ | `agentId` | `string` | O | Agent ID |
40
+ | `apiKey` | `string` | O | API key |
41
+ | `agentVersion` | `string` | | Agent version (`"current"`, `"production"`, `"v1"` 등, default: `"current"`) |
42
+ | `textOnly` | `boolean` | | Text-only session (`true`면 mic/audio 없이 chat mode로 연결, default: `false`) |
43
+ | `dynamicVariables` | `Record<string, string \| number \| boolean>` | | Agent prompt에 주입할 dynamic variables |
44
+ | `metadata` | `Record<string, unknown>` | | Call metadata (webhook, call log에 포함) |
45
+
46
+ ## 콜백
47
+
48
+ | 콜백 | 시그니처 | 설명 |
49
+ |------|----------|------|
50
+ | `onConnect` | `() => void` | 연결 성공 |
51
+ | `onDisconnect` | `() => void` | 연결 종료 |
52
+ | `onStatusChange` | `(status: ConversationStatus) => void` | Status 변경 (`"disconnected"` → `"connecting"` → `"connected"`) |
53
+ | `onModeChange` | `(mode: ConversationMode) => void` | Mode 변경 (`"listening"` ⇄ `"speaking"`) |
54
+ | `onMessage` | `(message: ConversationMessage) => void` | 메시지 수신 (user transcription, agent response) |
55
+ | `onError` | `(error: Error) => void` | 에러 발생 |
56
+
57
+ ## 메서드
58
+
59
+ ### 세션 제어
60
+
61
+ ```ts
62
+ // 세션 종료
63
+ await conversation.endSession();
64
+
65
+ // 세션 ID 조회
66
+ const id = conversation.getId();
67
+
68
+ // 현재 세션 메시지 조회
69
+ const messages = conversation.getMessages();
70
+ ```
71
+
72
+ ### 메시지 전송
73
+
74
+ ```ts
75
+ // 텍스트 메시지 전송 (음성 대신 텍스트 입력)
76
+ await conversation.sendUserMessage("안녕하세요");
77
+ ```
78
+
79
+ ### 메시지 히스토리
80
+
81
+ ```ts
82
+ const messages = conversation.getMessages();
83
+
84
+ messages.forEach((message) => {
85
+ console.log(message.source, message.text, message.isFinal);
86
+ });
87
+ ```
88
+
89
+ - `getMessages()`는 현재 세션에서 주고받은 메시지를 timestamp 순으로 반환
90
+ - 동일 `id`의 streaming update는 최신 메시지로 반영
91
+
92
+ ### Text Only
93
+
94
+ ```ts
95
+ const conversation = await Conversation.startSession({
96
+ agentId: "YOUR_AGENT_ID",
97
+ apiKey: "YOUR_API_KEY",
98
+ textOnly: true,
99
+ });
100
+
101
+ await conversation.sendUserMessage("텍스트로만 대화할게요");
102
+ ```
103
+
104
+ - text-only session은 microphone 권한을 요청하지 않음
105
+ - 에이전트 응답은 LiveKit text stream으로 수신됨
106
+ - audio 전용 API는 안전한 no-op 또는 zero-value를 반환
107
+
108
+ ### 마이크 제어
109
+
110
+ ```ts
111
+ // 음소거
112
+ await conversation.setMicMuted(true);
113
+
114
+ // 음소거 해제
115
+ await conversation.setMicMuted(false);
116
+
117
+ // 현재 음소거 상태 조회
118
+ const isMuted = conversation.getMicMuted();
119
+ ```
120
+
121
+ ### 볼륨 제어
122
+
123
+ ```ts
124
+ // Agent 음성 볼륨 설정 (0.0 ~ 1.0)
125
+ conversation.setVolume({ volume: 0.5 });
126
+ ```
127
+
128
+ ### 상태 조회
129
+
130
+ ```ts
131
+ const status = conversation.getStatus(); // "disconnected" | "connecting" | "connected"
132
+ const mode = conversation.getMode(); // "listening" | "speaking"
133
+ ```
134
+
135
+ ### 오디오 모니터링
136
+
137
+ ```ts
138
+ // 입출력 볼륨 (0.0 ~ 1.0)
139
+ const inputVol = conversation.getInputVolume();
140
+ const outputVol = conversation.getOutputVolume();
141
+
142
+ // Frequency data (Uint8Array, 시각화용)
143
+ const inputFreq = conversation.getInputByteFrequencyData();
144
+ const outputFreq = conversation.getOutputByteFrequencyData();
145
+ ```
146
+
147
+ ### 디바이스 전환
148
+
149
+ ```ts
150
+ // 입력 디바이스 변경
151
+ await conversation.changeInputDevice({ inputDeviceId: "device-id" });
152
+
153
+ // 출력 디바이스 변경
154
+ await conversation.changeOutputDevice({ outputDeviceId: "device-id" });
155
+ ```
156
+
157
+ 디바이스 목록은 [`navigator.mediaDevices.enumerateDevices()`](https://developer.mozilla.org/docs/Web/API/MediaDevices/enumerateDevices)로 조회.
158
+
159
+ ## Dynamic Variables / Metadata
160
+
161
+ ```ts
162
+ const conversation = await Conversation.startSession({
163
+ agentId: "YOUR_AGENT_ID",
164
+ apiKey: "YOUR_API_KEY",
165
+ agentVersion: "production",
166
+ dynamicVariables: {
167
+ userName: "홍길동",
168
+ userType: "premium",
169
+ accountBalance: 50000,
170
+ },
171
+ metadata: {
172
+ sessionId: "sess_abc123",
173
+ source: "mobile-app",
174
+ },
175
+ });
176
+ ```
177
+
178
+ - `dynamicVariables` — Agent prompt에서 `{{userName}}` 형식으로 참조
179
+ - `metadata` — Outbound webhook과 call log에 포함
180
+
181
+ ## Export 타입
182
+
183
+ ```ts
184
+ import type {
185
+ ConversationMessage,
186
+ ConversationMode,
187
+ ConversationSource,
188
+ ConversationStatus,
189
+ InputDeviceConfig,
190
+ OutputDeviceConfig,
191
+ SetVolumeParams,
192
+ StartSessionOptions,
193
+ } from "@vox-ai/client";
194
+ ```
195
+
196
+ ## 참고
197
+
198
+ - 인증은 `apiKey` 직접 전달 방식
199
+ - 내부 연결은 LiveKit WebRTC 기반
200
+ - 브라우저별 audio device 제약이 있을 수 있음
@@ -0,0 +1,53 @@
1
+ import type { ConversationMessage, ConversationMode, ConversationStatus, InputDeviceConfig, OutputDeviceConfig, SetVolumeParams, StartSessionOptions } from "./types";
2
+ export declare class Conversation {
3
+ private readonly options;
4
+ private readonly room;
5
+ private connectionDetails;
6
+ private status;
7
+ private mode;
8
+ private readonly messages;
9
+ private readonly remoteAudioElements;
10
+ private audioContext;
11
+ private inputAnalyser;
12
+ private outputAnalyser;
13
+ private inputSourceNode;
14
+ private outputSourceNode;
15
+ private inputMonitorGain;
16
+ private outputMonitorGain;
17
+ private readonly inputFrequencyData;
18
+ private readonly outputFrequencyData;
19
+ private isMicMuted;
20
+ private constructor();
21
+ static startSession(options: StartSessionOptions): Promise<Conversation>;
22
+ private start;
23
+ private fetchConnectionDetails;
24
+ private bindRoomEvents;
25
+ private handleTextOnlyTranscription;
26
+ private attachRemoteAudioTrack;
27
+ private getOrCreateAudioContext;
28
+ private setupInputAnalyser;
29
+ private setupOutputAnalyser;
30
+ private setupOutputAnalyserFromElement;
31
+ private setupInputAnalyserFromLocalMicTrack;
32
+ private pushMessage;
33
+ private updateStatus;
34
+ private updateMode;
35
+ private handleError;
36
+ private cleanup;
37
+ endSession(): Promise<void>;
38
+ getId(): string | undefined;
39
+ getMessages(): ConversationMessage[];
40
+ setVolume({ volume }: SetVolumeParams): void;
41
+ setMicMuted(isMuted: boolean): Promise<void>;
42
+ sendUserMessage(text: string): Promise<void>;
43
+ changeInputDevice(config: InputDeviceConfig): Promise<boolean>;
44
+ changeOutputDevice(config: OutputDeviceConfig): Promise<boolean>;
45
+ getInputByteFrequencyData(): Uint8Array;
46
+ getOutputByteFrequencyData(): Uint8Array;
47
+ getInputVolume(): number;
48
+ getOutputVolume(): number;
49
+ getStatus(): ConversationStatus;
50
+ getMode(): ConversationMode;
51
+ getMicMuted(): boolean;
52
+ private isTextOnlySession;
53
+ }
@@ -0,0 +1,469 @@
1
+ import { ConnectionState, Room, RoomEvent, Track, } from "livekit-client";
2
+ const LIVEKIT_TOKEN_ENDPOINT = "https://api.tryvox.co/functions/v1/livekit-token";
3
+ const LIVEKIT_CHAT_TOPIC = "lk.chat";
4
+ const LIVEKIT_LEGACY_CHAT_TOPIC = "lk-chat-topic";
5
+ const LIVEKIT_TRANSCRIPTION_TOPIC = "lk.transcription";
6
+ export class Conversation {
7
+ options;
8
+ room;
9
+ connectionDetails = null;
10
+ status = "disconnected";
11
+ mode = "listening";
12
+ messages = new Map();
13
+ remoteAudioElements = new Set();
14
+ audioContext = null;
15
+ inputAnalyser = null;
16
+ outputAnalyser = null;
17
+ inputSourceNode = null;
18
+ outputSourceNode = null;
19
+ inputMonitorGain = null;
20
+ outputMonitorGain = null;
21
+ inputFrequencyData = new Uint8Array(128);
22
+ outputFrequencyData = new Uint8Array(128);
23
+ isMicMuted = true;
24
+ constructor(options) {
25
+ this.options = options;
26
+ this.room = new Room();
27
+ }
28
+ static async startSession(options) {
29
+ const conversation = new Conversation(options);
30
+ await conversation.start();
31
+ return conversation;
32
+ }
33
+ async start() {
34
+ this.updateStatus("connecting");
35
+ try {
36
+ const connectionDetails = await this.fetchConnectionDetails();
37
+ this.connectionDetails = connectionDetails;
38
+ this.bindRoomEvents();
39
+ await this.room.connect(connectionDetails.serverUrl, connectionDetails.participantToken);
40
+ if (this.isTextOnlySession()) {
41
+ this.isMicMuted = true;
42
+ }
43
+ else {
44
+ // Voice sessions should start with microphone enabled by default.
45
+ try {
46
+ await this.room.localParticipant.setMicrophoneEnabled(true);
47
+ this.isMicMuted = false;
48
+ this.setupInputAnalyserFromLocalMicTrack();
49
+ }
50
+ catch (error) {
51
+ this.isMicMuted = true;
52
+ this.handleError(error instanceof Error
53
+ ? error
54
+ : new Error("Failed to enable microphone"));
55
+ }
56
+ }
57
+ this.updateStatus("connected");
58
+ this.options.onConnect?.();
59
+ }
60
+ catch (error) {
61
+ this.updateStatus("disconnected");
62
+ this.handleError(error);
63
+ throw error instanceof Error ? error : new Error(String(error));
64
+ }
65
+ }
66
+ async fetchConnectionDetails() {
67
+ const sourceType = "react-sdk";
68
+ const sourceVersion = "0.1.0";
69
+ const mode = this.isTextOnlySession() ? "chat" : "call";
70
+ const dynamicVariables = this.options.dynamicVariables ?? {};
71
+ const metadata = this.options.metadata ?? {};
72
+ const body = {
73
+ agent_id: this.options.agentId,
74
+ agent_version: this.options.agentVersion ?? "current",
75
+ mode,
76
+ dynamic_variables: dynamicVariables,
77
+ metadata: {
78
+ runtime_context: {
79
+ source: {
80
+ type: sourceType,
81
+ version: sourceVersion,
82
+ },
83
+ mode,
84
+ },
85
+ call_web: {
86
+ dynamic_variables: dynamicVariables,
87
+ metadata,
88
+ },
89
+ },
90
+ };
91
+ const response = await fetch(LIVEKIT_TOKEN_ENDPOINT, {
92
+ method: "POST",
93
+ headers: {
94
+ Authorization: `Bearer ${this.options.apiKey}`,
95
+ "Content-Type": "application/json",
96
+ },
97
+ body: JSON.stringify(body),
98
+ });
99
+ if (!response.ok) {
100
+ const text = await response.text();
101
+ throw new Error(`Session initialization failed (${response.status}): ${text}`);
102
+ }
103
+ return (await response.json());
104
+ }
105
+ bindRoomEvents() {
106
+ if (this.isTextOnlySession()) {
107
+ const handleTextOnlyTranscription = (reader, participantInfo) => {
108
+ void this.handleTextOnlyTranscription(reader, participantInfo.identity);
109
+ };
110
+ this.room.registerTextStreamHandler(LIVEKIT_TRANSCRIPTION_TOPIC, handleTextOnlyTranscription);
111
+ }
112
+ this.room.on(RoomEvent.ConnectionStateChanged, (state) => {
113
+ if (state === ConnectionState.Connected) {
114
+ this.updateStatus("connected");
115
+ }
116
+ if (state === ConnectionState.Disconnected) {
117
+ this.updateStatus("disconnected");
118
+ }
119
+ });
120
+ this.room.on(RoomEvent.Disconnected, () => {
121
+ this.cleanup();
122
+ this.updateStatus("disconnected");
123
+ this.options.onDisconnect?.();
124
+ });
125
+ this.room.on(RoomEvent.ActiveSpeakersChanged, (speakers) => {
126
+ if (this.isTextOnlySession())
127
+ return;
128
+ const hasRemoteSpeaker = speakers.some((speaker) => !speaker.isLocal);
129
+ this.updateMode(hasRemoteSpeaker ? "speaking" : "listening");
130
+ });
131
+ this.room.on(RoomEvent.ChatMessage, (message, participant) => {
132
+ this.pushMessage({
133
+ id: message.id,
134
+ source: participant?.isLocal ? "user" : "agent",
135
+ text: message.message,
136
+ timestamp: message.timestamp,
137
+ isFinal: true,
138
+ });
139
+ });
140
+ this.room.on(RoomEvent.TranscriptionReceived, (segments, participant) => {
141
+ if (!segments)
142
+ return;
143
+ const source = participant?.isLocal ? "user" : "agent";
144
+ for (const segment of segments) {
145
+ this.pushMessage({
146
+ id: segment.id,
147
+ source,
148
+ text: segment.text,
149
+ timestamp: Date.now(),
150
+ isFinal: Boolean(segment.final),
151
+ });
152
+ }
153
+ });
154
+ this.room.on(RoomEvent.LocalTrackPublished, (publication) => {
155
+ if (publication.source !== Track.Source.Microphone)
156
+ return;
157
+ const track = publication.track;
158
+ if (!track || !track.mediaStreamTrack)
159
+ return;
160
+ this.setupInputAnalyser(track.mediaStreamTrack);
161
+ });
162
+ this.room.on(RoomEvent.TrackSubscribed, (track, _publication, participant) => {
163
+ if (this.isTextOnlySession())
164
+ return;
165
+ if (track.kind !== Track.Kind.Audio || participant.isLocal)
166
+ return;
167
+ const remoteTrack = track;
168
+ if (!this.outputAnalyser && remoteTrack.mediaStreamTrack) {
169
+ this.setupOutputAnalyser(remoteTrack.mediaStreamTrack);
170
+ }
171
+ this.attachRemoteAudioTrack(remoteTrack);
172
+ });
173
+ this.room.on(RoomEvent.TrackUnsubscribed, (track) => {
174
+ if (track.kind !== Track.Kind.Audio)
175
+ return;
176
+ for (const element of track.detach()) {
177
+ this.remoteAudioElements.delete(element);
178
+ if (element.parentNode)
179
+ element.parentNode.removeChild(element);
180
+ }
181
+ });
182
+ }
183
+ async handleTextOnlyTranscription(reader, participantIdentity) {
184
+ const source = participantIdentity === this.room.localParticipant.identity
185
+ ? "user"
186
+ : "agent";
187
+ const messageId = reader.info.attributes?.["lk.segment_id"] ?? reader.info.id;
188
+ let latestText = "";
189
+ if (source === "agent") {
190
+ this.updateMode("speaking");
191
+ }
192
+ try {
193
+ for await (const text of reader) {
194
+ latestText = text;
195
+ this.pushMessage({
196
+ id: messageId,
197
+ source,
198
+ text,
199
+ timestamp: reader.info.timestamp,
200
+ isFinal: false,
201
+ });
202
+ }
203
+ if (latestText) {
204
+ this.pushMessage({
205
+ id: messageId,
206
+ source,
207
+ text: latestText,
208
+ timestamp: reader.info.timestamp,
209
+ isFinal: true,
210
+ });
211
+ }
212
+ }
213
+ catch (error) {
214
+ this.handleError(error);
215
+ }
216
+ finally {
217
+ if (source === "agent") {
218
+ this.updateMode("listening");
219
+ }
220
+ }
221
+ }
222
+ attachRemoteAudioTrack(track) {
223
+ const attached = track.attach();
224
+ const elements = Array.isArray(attached) ? attached : [attached];
225
+ for (const element of elements) {
226
+ if (!(element instanceof HTMLAudioElement))
227
+ continue;
228
+ element.autoplay = true;
229
+ element.style.display = "none";
230
+ document.body.appendChild(element);
231
+ this.remoteAudioElements.add(element);
232
+ if (!this.outputAnalyser) {
233
+ this.setupOutputAnalyserFromElement(element);
234
+ }
235
+ }
236
+ }
237
+ getOrCreateAudioContext() {
238
+ if (!this.audioContext || this.audioContext.state === "closed") {
239
+ this.audioContext = new AudioContext();
240
+ }
241
+ if (this.audioContext.state === "suspended") {
242
+ this.audioContext.resume().catch(() => { });
243
+ }
244
+ return this.audioContext;
245
+ }
246
+ setupInputAnalyser(track) {
247
+ const context = this.getOrCreateAudioContext();
248
+ const stream = new MediaStream([track]);
249
+ const source = context.createMediaStreamSource(stream);
250
+ const analyser = context.createAnalyser();
251
+ analyser.fftSize = 256;
252
+ source.connect(analyser);
253
+ const silentSink = context.createGain();
254
+ silentSink.gain.value = 0;
255
+ analyser.connect(silentSink);
256
+ silentSink.connect(context.destination);
257
+ this.inputSourceNode = source;
258
+ this.inputMonitorGain = silentSink;
259
+ this.inputAnalyser = analyser;
260
+ }
261
+ setupOutputAnalyser(track) {
262
+ const context = this.getOrCreateAudioContext();
263
+ const stream = new MediaStream([track]);
264
+ const source = context.createMediaStreamSource(stream);
265
+ const analyser = context.createAnalyser();
266
+ analyser.fftSize = 256;
267
+ source.connect(analyser);
268
+ const silentSink = context.createGain();
269
+ silentSink.gain.value = 0;
270
+ analyser.connect(silentSink);
271
+ silentSink.connect(context.destination);
272
+ this.outputSourceNode = source;
273
+ this.outputMonitorGain = silentSink;
274
+ this.outputAnalyser = analyser;
275
+ }
276
+ setupOutputAnalyserFromElement(element) {
277
+ const context = this.getOrCreateAudioContext();
278
+ const source = context.createMediaElementSource(element);
279
+ const analyser = context.createAnalyser();
280
+ analyser.fftSize = 256;
281
+ source.connect(analyser);
282
+ source.connect(context.destination);
283
+ this.outputSourceNode = source;
284
+ this.outputMonitorGain = null;
285
+ this.outputAnalyser = analyser;
286
+ }
287
+ setupInputAnalyserFromLocalMicTrack() {
288
+ const publication = this.room.localParticipant.getTrackPublication(Track.Source.Microphone);
289
+ const track = publication?.track;
290
+ if (!track || !track.mediaStreamTrack)
291
+ return;
292
+ this.setupInputAnalyser(track.mediaStreamTrack);
293
+ }
294
+ pushMessage(message) {
295
+ this.messages.set(message.id, message);
296
+ this.options.onMessage?.(message);
297
+ }
298
+ updateStatus(status) {
299
+ if (this.status === status)
300
+ return;
301
+ this.status = status;
302
+ this.options.onStatusChange?.(status);
303
+ }
304
+ updateMode(mode) {
305
+ if (this.mode === mode)
306
+ return;
307
+ this.mode = mode;
308
+ this.options.onModeChange?.(mode);
309
+ }
310
+ handleError(error) {
311
+ const safeError = error instanceof Error ? error : new Error(String(error));
312
+ this.options.onError?.(safeError);
313
+ }
314
+ cleanup() {
315
+ this.room.unregisterTextStreamHandler(LIVEKIT_TRANSCRIPTION_TOPIC);
316
+ for (const element of this.remoteAudioElements) {
317
+ element.remove();
318
+ }
319
+ this.remoteAudioElements.clear();
320
+ this.inputAnalyser = null;
321
+ this.outputAnalyser = null;
322
+ this.inputSourceNode = null;
323
+ this.outputSourceNode = null;
324
+ this.inputMonitorGain = null;
325
+ this.outputMonitorGain = null;
326
+ if (this.audioContext && this.audioContext.state !== "closed") {
327
+ this.audioContext.close().catch(() => { });
328
+ }
329
+ this.audioContext = null;
330
+ }
331
+ async endSession() {
332
+ if (this.status === "disconnected")
333
+ return;
334
+ this.room.disconnect();
335
+ this.cleanup();
336
+ this.updateStatus("disconnected");
337
+ }
338
+ getId() {
339
+ return this.connectionDetails?.roomName;
340
+ }
341
+ getMessages() {
342
+ return Array.from(this.messages.values()).sort((a, b) => a.timestamp - b.timestamp);
343
+ }
344
+ setVolume({ volume }) {
345
+ if (this.isTextOnlySession())
346
+ return;
347
+ const normalized = Math.max(0, Math.min(1, volume));
348
+ for (const element of this.remoteAudioElements) {
349
+ element.volume = normalized;
350
+ }
351
+ }
352
+ async setMicMuted(isMuted) {
353
+ if (this.isTextOnlySession()) {
354
+ this.isMicMuted = true;
355
+ return;
356
+ }
357
+ const prev = this.isMicMuted;
358
+ this.isMicMuted = isMuted;
359
+ try {
360
+ await this.room.localParticipant.setMicrophoneEnabled(!isMuted);
361
+ if (!isMuted) {
362
+ this.setupInputAnalyserFromLocalMicTrack();
363
+ }
364
+ }
365
+ catch (error) {
366
+ this.isMicMuted = prev;
367
+ throw error;
368
+ }
369
+ }
370
+ async sendUserMessage(text) {
371
+ const trimmed = text.trim();
372
+ if (!trimmed)
373
+ return;
374
+ const timestamp = Date.now();
375
+ const messageId = typeof crypto !== "undefined" && typeof crypto.randomUUID === "function"
376
+ ? crypto.randomUUID()
377
+ : `user-${timestamp}`;
378
+ // Match the prior hook convention: send text stream on lk.chat.
379
+ if (typeof this.room.localParticipant.sendText === "function") {
380
+ await this.room.localParticipant.sendText(trimmed, {
381
+ topic: LIVEKIT_CHAT_TOPIC,
382
+ });
383
+ // Compatibility fallback for legacy chat topic consumers.
384
+ try {
385
+ const payload = new TextEncoder().encode(JSON.stringify({
386
+ id: messageId,
387
+ timestamp,
388
+ message: trimmed,
389
+ ignoreLegacy: true,
390
+ }));
391
+ await this.room.localParticipant.publishData(payload, {
392
+ reliable: true,
393
+ topic: LIVEKIT_LEGACY_CHAT_TOPIC,
394
+ });
395
+ }
396
+ catch {
397
+ // Non-fatal: stream text path is the primary transport.
398
+ }
399
+ this.pushMessage({
400
+ id: messageId,
401
+ source: "user",
402
+ text: trimmed,
403
+ timestamp,
404
+ isFinal: true,
405
+ });
406
+ return;
407
+ }
408
+ if (typeof this.room.localParticipant.sendChatMessage === "function") {
409
+ await this.room.localParticipant.sendChatMessage(trimmed);
410
+ return;
411
+ }
412
+ // Last-resort local echo when text send APIs are unavailable.
413
+ this.pushMessage({
414
+ id: `user-${Date.now()}`,
415
+ source: "user",
416
+ text: trimmed,
417
+ timestamp: Date.now(),
418
+ isFinal: true,
419
+ });
420
+ }
421
+ async changeInputDevice(config) {
422
+ if (this.isTextOnlySession())
423
+ return false;
424
+ return this.room.switchActiveDevice("audioinput", config.inputDeviceId);
425
+ }
426
+ async changeOutputDevice(config) {
427
+ if (this.isTextOnlySession())
428
+ return false;
429
+ return this.room.switchActiveDevice("audiooutput", config.outputDeviceId);
430
+ }
431
+ getInputByteFrequencyData() {
432
+ if (!this.inputAnalyser)
433
+ return new Uint8Array();
434
+ this.inputAnalyser.getByteFrequencyData(this.inputFrequencyData);
435
+ return new Uint8Array(this.inputFrequencyData);
436
+ }
437
+ getOutputByteFrequencyData() {
438
+ if (!this.outputAnalyser)
439
+ return new Uint8Array();
440
+ this.outputAnalyser.getByteFrequencyData(this.outputFrequencyData);
441
+ return new Uint8Array(this.outputFrequencyData);
442
+ }
443
+ getInputVolume() {
444
+ const data = this.getInputByteFrequencyData();
445
+ if (!data.length)
446
+ return 0;
447
+ const peak = Math.max(...data);
448
+ return peak / 255;
449
+ }
450
+ getOutputVolume() {
451
+ const data = this.getOutputByteFrequencyData();
452
+ if (!data.length)
453
+ return 0;
454
+ const peak = Math.max(...data);
455
+ return peak / 255;
456
+ }
457
+ getStatus() {
458
+ return this.status;
459
+ }
460
+ getMode() {
461
+ return this.mode;
462
+ }
463
+ getMicMuted() {
464
+ return this.isMicMuted;
465
+ }
466
+ isTextOnlySession() {
467
+ return this.options.textOnly === true;
468
+ }
469
+ }
@@ -0,0 +1,2 @@
1
+ export { Conversation } from "./conversation";
2
+ export type { ConversationMessage, ConversationMode, ConversationSource, ConversationStatus, InputDeviceConfig, OutputDeviceConfig, SetVolumeParams, StartSessionOptions, } from "./types";
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ export { Conversation } from "./conversation";
@@ -0,0 +1,33 @@
1
+ export type ConversationStatus = "disconnected" | "connecting" | "connected";
2
+ export type ConversationMode = "listening" | "speaking";
3
+ export type ConversationSource = "agent" | "user" | "system";
4
+ export type ConversationMessage = {
5
+ id: string;
6
+ source: ConversationSource;
7
+ text: string;
8
+ timestamp: number;
9
+ isFinal: boolean;
10
+ };
11
+ export type StartSessionOptions = {
12
+ agentId: string;
13
+ apiKey: string;
14
+ agentVersion?: string;
15
+ textOnly?: boolean;
16
+ dynamicVariables?: Record<string, string | number | boolean>;
17
+ metadata?: Record<string, unknown>;
18
+ onConnect?: () => void;
19
+ onDisconnect?: () => void;
20
+ onError?: (error: Error) => void;
21
+ onMessage?: (message: ConversationMessage) => void;
22
+ onStatusChange?: (status: ConversationStatus) => void;
23
+ onModeChange?: (mode: ConversationMode) => void;
24
+ };
25
+ export type SetVolumeParams = {
26
+ volume: number;
27
+ };
28
+ export type InputDeviceConfig = {
29
+ inputDeviceId: string;
30
+ };
31
+ export type OutputDeviceConfig = {
32
+ outputDeviceId: string;
33
+ };
package/dist/types.js ADDED
@@ -0,0 +1 @@
1
+ export {};
package/package.json ADDED
@@ -0,0 +1,30 @@
1
+ {
2
+ "name": "@vox-ai/client",
3
+ "version": "0.1.0",
4
+ "description": "vox.ai JavaScript SDK",
5
+ "type": "module",
6
+ "main": "./dist/index.js",
7
+ "types": "./dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "files": [
15
+ "dist"
16
+ ],
17
+ "scripts": {
18
+ "build": "tsc -p tsconfig.json",
19
+ "clean": "rm -rf dist",
20
+ "typecheck": "tsc -p tsconfig.json --noEmit",
21
+ "prepublishOnly": "npm run build"
22
+ },
23
+ "dependencies": {
24
+ "livekit-client": "^2.10.0"
25
+ },
26
+ "devDependencies": {
27
+ "typescript": "^5.5.4"
28
+ },
29
+ "license": "MIT"
30
+ }