@convbased/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +235 -0
  3. package/dist/cjs/client.js +635 -0
  4. package/dist/cjs/client.js.map +1 -0
  5. package/dist/cjs/endpoints.js +10 -0
  6. package/dist/cjs/endpoints.js.map +1 -0
  7. package/dist/cjs/events.js +39 -0
  8. package/dist/cjs/events.js.map +1 -0
  9. package/dist/cjs/graphql.js +40 -0
  10. package/dist/cjs/graphql.js.map +1 -0
  11. package/dist/cjs/index.js +24 -0
  12. package/dist/cjs/index.js.map +1 -0
  13. package/dist/cjs/package.json +3 -0
  14. package/dist/cjs/rtcServers.js +35 -0
  15. package/dist/cjs/rtcServers.js.map +1 -0
  16. package/dist/cjs/sdp.js +37 -0
  17. package/dist/cjs/sdp.js.map +1 -0
  18. package/dist/cjs/signaling.js +146 -0
  19. package/dist/cjs/signaling.js.map +1 -0
  20. package/dist/cjs/tts.js +227 -0
  21. package/dist/cjs/tts.js.map +1 -0
  22. package/dist/cjs/types.js +26 -0
  23. package/dist/cjs/types.js.map +1 -0
  24. package/dist/cjs/upload.js +87 -0
  25. package/dist/cjs/upload.js.map +1 -0
  26. package/dist/client.d.ts +169 -0
  27. package/dist/client.d.ts.map +1 -0
  28. package/dist/client.js +631 -0
  29. package/dist/client.js.map +1 -0
  30. package/dist/convbased-sdk.global.js +1291 -0
  31. package/dist/endpoints.d.ts +3 -0
  32. package/dist/endpoints.d.ts.map +1 -0
  33. package/dist/endpoints.js +7 -0
  34. package/dist/endpoints.js.map +1 -0
  35. package/dist/events.d.ts +9 -0
  36. package/dist/events.d.ts.map +1 -0
  37. package/dist/events.js +35 -0
  38. package/dist/events.js.map +1 -0
  39. package/dist/graphql.d.ts +18 -0
  40. package/dist/graphql.d.ts.map +1 -0
  41. package/dist/graphql.js +37 -0
  42. package/dist/graphql.js.map +1 -0
  43. package/dist/index.d.ts +9 -0
  44. package/dist/index.d.ts.map +1 -0
  45. package/dist/index.js +9 -0
  46. package/dist/index.js.map +1 -0
  47. package/dist/rtcServers.d.ts +13 -0
  48. package/dist/rtcServers.d.ts.map +1 -0
  49. package/dist/rtcServers.js +31 -0
  50. package/dist/rtcServers.js.map +1 -0
  51. package/dist/sdp.d.ts +6 -0
  52. package/dist/sdp.d.ts.map +1 -0
  53. package/dist/sdp.js +34 -0
  54. package/dist/sdp.js.map +1 -0
  55. package/dist/signaling.d.ts +33 -0
  56. package/dist/signaling.d.ts.map +1 -0
  57. package/dist/signaling.js +142 -0
  58. package/dist/signaling.js.map +1 -0
  59. package/dist/tts.d.ts +111 -0
  60. package/dist/tts.d.ts.map +1 -0
  61. package/dist/tts.js +223 -0
  62. package/dist/tts.js.map +1 -0
  63. package/dist/types.d.ts +194 -0
  64. package/dist/types.d.ts.map +1 -0
  65. package/dist/types.js +23 -0
  66. package/dist/types.js.map +1 -0
  67. package/dist/upload.d.ts +46 -0
  68. package/dist/upload.d.ts.map +1 -0
  69. package/dist/upload.js +82 -0
  70. package/dist/upload.js.map +1 -0
  71. package/package.json +57 -0
  72. package/src/client.ts +839 -0
  73. package/src/endpoints.ts +8 -0
  74. package/src/events.ts +38 -0
  75. package/src/graphql.ts +58 -0
  76. package/src/index.ts +50 -0
  77. package/src/rtcServers.ts +38 -0
  78. package/src/sdp.ts +45 -0
  79. package/src/signaling.ts +172 -0
  80. package/src/tts.ts +364 -0
  81. package/src/types.ts +201 -0
  82. package/src/upload.ts +132 -0
package/src/client.ts ADDED
@@ -0,0 +1,839 @@
1
+ import { TypedEmitter } from "./events.js";
2
+ import { DEFAULT_GRAPHQL_URL, DEFAULT_SIGNALING_URL } from "./endpoints.js";
3
+ import { applyOpusSdpOptions } from "./sdp.js";
4
+ import { SignalingChannel } from "./signaling.js";
5
+ import {
6
+ DEFAULT_STUN_SERVERS,
7
+ fetchRTCServers,
8
+ } from "./rtcServers.js";
9
+ import { uploadAudio } from "./upload.js";
10
+ import {
11
+ type ConnectionState,
12
+ type ConnectionStats,
13
+ type ConnectOptions,
14
+ type ConvbasedClientOptions,
15
+ type FileInferencePreferences,
16
+ type IncomingMessage,
17
+ type RTCPreferences,
18
+ type RTCServersConfig,
19
+ RTCStatusCode,
20
+ type ServerMessageEvent,
21
+ type TaskStatus,
22
+ } from "./types.js";
23
+
24
+ export interface TaskAckEvent {
25
+ taskId: string;
26
+ status: "queued" | "started";
27
+ queuePosition?: number;
28
+ code?: number;
29
+ }
30
+
31
+ export interface TaskProgressEvent {
32
+ taskId: string;
33
+ /** Progress in [0, 1]. */
34
+ progress: number;
35
+ code?: number;
36
+ }
37
+
38
+ export interface TaskFinishedEvent {
39
+ taskId: string;
40
+ status: TaskStatus;
41
+ /** COS key of the converted audio, on success. */
42
+ resultKey?: string;
43
+ /** Presigned download URL of the converted audio, on success. */
44
+ downloadUrl?: string;
45
+ /** Server-reported error, on failure. */
46
+ error?: string;
47
+ code?: number;
48
+ }
49
+
50
+ export interface StartTaskOptions {
51
+ /** COS key of the source audio to convert (upload via `uploadAudio`). */
52
+ audioKey: string;
53
+ /** Optional client-supplied task id; one is generated when omitted. */
54
+ taskId?: string;
55
+ /** Base name for the generated output file. Default `"output"`. */
56
+ generateName?: string;
57
+ /** Output container format. Default `"wav"`. */
58
+ format?: string;
59
+ /** Per-task conversion parameters. */
60
+ preferences?: FileInferencePreferences;
61
+ }
62
+
63
+ export interface RunFileInferenceOptions
64
+ extends Omit<StartTaskOptions, "audioKey"> {
65
+ /** COS key of an already-uploaded source audio. Provide this or `audio`. */
66
+ audioKey?: string;
67
+ /** A source-audio `Blob`/`File` to upload first. Provide this or `audioKey`. */
68
+ audio?: Blob;
69
+ /** Give up waiting after this many ms. Default 300_000 (5 min). */
70
+ timeoutMs?: number;
71
+ /** Abort the wait (and stop the task). */
72
+ signal?: AbortSignal;
73
+ /** Called on `task_progress` frames. */
74
+ onProgress?: (event: TaskProgressEvent) => void;
75
+ /** Called on the `task_ack` frame. */
76
+ onAck?: (event: TaskAckEvent) => void;
77
+ }
78
+
79
+ type ClientEvents = {
80
+ state: { state: ConnectionState; previous: ConnectionState };
81
+ message: ServerMessageEvent;
82
+ ready: { code: number; message?: string };
83
+ track: { stream: MediaStream; track: MediaStreamTrack };
84
+ error: Error;
85
+ closed: { code?: number; reason?: string };
86
+ taskAck: TaskAckEvent;
87
+ taskProgress: TaskProgressEvent;
88
+ taskFinished: TaskFinishedEvent;
89
+ };
90
+
91
+ /**
92
+ * Real-time voice conversion client. Mirrors the flow used by Convbased-Web:
93
+ *
94
+ * 1. Open WebSocket to `${signalingUrl}/signaling/ws?api_key=…`.
95
+ * 2. Capture the microphone (or accept a user-provided `MediaStream`).
96
+ * 3. Build an `RTCPeerConnection`, attach the mic track, mangle the offer's
97
+ * Opus parameters, and send `{type: "offer", sdp, preferences}` over the
98
+ * signaling socket.
99
+ * 4. Apply the `answer` and `ice_candidate` messages from the server, fire
100
+ * local ICE candidates back over signaling.
101
+ * 5. When the server sends `{code: SERVICE_READY}` the processed track is
102
+ * emitted via the `track` event — wire it to an `<audio>` element to hear
103
+ * converted audio.
104
+ *
105
+ * The client is single-use: call `connect()` once, then `disconnect()` to
106
+ * tear everything down. Create a fresh instance for a new session.
107
+ */
108
+ export class ConvbasedClient extends TypedEmitter<ClientEvents> {
109
+ private readonly opts: Omit<
110
+ ConvbasedClientOptions,
111
+ "signalingUrl" | "graphqlUrl"
112
+ > & {
113
+ signalingUrl: string;
114
+ graphqlUrl: string | false;
115
+ iceTransportPolicy: RTCIceTransportPolicy;
116
+ bitrate: number;
117
+ stereo: boolean;
118
+ signalingTimeoutMs: number;
119
+ connectTimeoutMs: number;
120
+ };
121
+ private readonly logger: Required<NonNullable<ConvbasedClientOptions["logger"]>>;
122
+
123
+ private state: ConnectionState = "idle";
124
+ private signaling: SignalingChannel | null = null;
125
+ private pc: RTCPeerConnection | null = null;
126
+ private localStream: MediaStream | null = null;
127
+ private convertedStream: MediaStream | null = null;
128
+ private serviceReadyTimer: ReturnType<typeof setTimeout> | null = null;
129
+ private offerInFlight = false;
130
+
131
+ constructor(options: ConvbasedClientOptions) {
132
+ super();
133
+ if (!options.apiKey && !options.accessToken) {
134
+ throw new Error(
135
+ "ConvbasedClient requires either `apiKey` or `accessToken`"
136
+ );
137
+ }
138
+ this.opts = {
139
+ iceTransportPolicy: "all",
140
+ bitrate: 64,
141
+ stereo: false,
142
+ signalingTimeoutMs: 120_000,
143
+ connectTimeoutMs: 20_000,
144
+ ...options,
145
+ // Endpoints fall back to the baked-in Convbased production URLs.
146
+ signalingUrl: options.signalingUrl ?? DEFAULT_SIGNALING_URL,
147
+ graphqlUrl:
148
+ options.graphqlUrl === false
149
+ ? false
150
+ : (options.graphqlUrl ?? DEFAULT_GRAPHQL_URL),
151
+ };
152
+ const provided = options.logger ?? {};
153
+ this.logger = {
154
+ debug: provided.debug ?? (() => {}),
155
+ info: provided.info ?? (() => {}),
156
+ warn: provided.warn ?? console.warn.bind(console),
157
+ error: provided.error ?? console.error.bind(console),
158
+ };
159
+ }
160
+
161
+ getState(): ConnectionState {
162
+ return this.state;
163
+ }
164
+
165
+ /** The converted (voice-changed) audio stream returned from the inference node. */
166
+ getConvertedStream(): MediaStream | null {
167
+ return this.convertedStream;
168
+ }
169
+
170
+ getPeerConnection(): RTCPeerConnection | null {
171
+ return this.pc;
172
+ }
173
+
174
+ /**
175
+ * Open the signaling socket, capture audio, and run WebRTC negotiation.
176
+ * Resolves when the server emits SERVICE_READY (audio is flowing both ways).
177
+ * Rejects on auth failures, signaling errors, or negotiation timeouts.
178
+ */
179
+ async connect(opts: ConnectOptions): Promise<void> {
180
+ if (this.state !== "idle") {
181
+ throw new Error(
182
+ `ConvbasedClient.connect() called from invalid state "${this.state}"`
183
+ );
184
+ }
185
+ if (!opts.modelId) {
186
+ throw new Error("ConnectOptions.modelId is required");
187
+ }
188
+
189
+ this.setState("signaling");
190
+ try {
191
+ await this.openSignaling();
192
+ const iceServers = await this.resolveIceServers();
193
+ this.setState("negotiating");
194
+ await this.openPeer(opts, iceServers);
195
+ await this.waitForServiceReady();
196
+ this.setState("connected");
197
+ } catch (err) {
198
+ const error = err instanceof Error ? err : new Error(String(err));
199
+ this.emit("error", error);
200
+ await this.disconnect().catch(() => {});
201
+ this.setState("error");
202
+ throw error;
203
+ }
204
+ }
205
+
206
+ /**
207
+ * Send a runtime config update to the inference node. Equivalent to the
208
+ * pitch / formant / RMS sliders in Convbased-Web — only the fields you set
209
+ * are forwarded.
210
+ */
211
+ updateConfig(preferences: Partial<RTCPreferences>): void {
212
+ if (!this.signaling?.isOpen) {
213
+ throw new Error("Cannot updateConfig: signaling channel is closed");
214
+ }
215
+ this.signaling.send({ type: "config", preferences });
216
+ }
217
+
218
+ // ---------------------------------------------------------------------
219
+ // File inference (voice-to-voice) — convert an uploaded audio file within
220
+ // the current live session. Requires the client to be `connected` first;
221
+ // the inference node is provisioned by `connect()`.
222
+ // ---------------------------------------------------------------------
223
+
224
+ /**
225
+ * Upload a source-audio `Blob`/`File` and resolve its COS key, ready to pass
226
+ * to `startTask` / `runFileInference`. Requires `graphqlUrl` (the default
227
+ * production endpoint, or a self-hosted override — not `false`).
228
+ */
229
+ async uploadAudio(
230
+ file: Blob,
231
+ opts?: { filename?: string; contentType?: string; signal?: AbortSignal }
232
+ ): Promise<{ key: string }> {
233
+ if (typeof this.opts.graphqlUrl !== "string") {
234
+ throw new Error(
235
+ "uploadAudio requires a GraphQL endpoint; do not set `graphqlUrl: false`"
236
+ );
237
+ }
238
+ return uploadAudio({
239
+ graphqlUrl: this.opts.graphqlUrl,
240
+ apiKey: this.opts.apiKey,
241
+ accessToken: this.opts.accessToken,
242
+ file,
243
+ filename: opts?.filename,
244
+ contentType: opts?.contentType,
245
+ signal: opts?.signal,
246
+ });
247
+ }
248
+
249
+ /**
250
+ * Submit a file-inference task over the signaling channel and return its
251
+ * task id. The client must be `connected`. Results arrive asynchronously via
252
+ * the `taskAck` / `taskProgress` / `taskFinished` events — use
253
+ * `runFileInference` for a promise-based wrapper.
254
+ */
255
+ startTask(opts: StartTaskOptions): string {
256
+ if (this.state !== "connected") {
257
+ throw new Error(
258
+ `startTask requires a connected session (current state "${this.state}")`
259
+ );
260
+ }
261
+ if (!this.signaling?.isOpen) {
262
+ throw new Error("Cannot startTask: signaling channel is closed");
263
+ }
264
+ if (!opts.audioKey) {
265
+ throw new Error("StartTaskOptions.audioKey is required");
266
+ }
267
+ const taskId = opts.taskId ?? generateTaskId();
268
+ this.signaling.send({
269
+ type: "task_start",
270
+ task_id: taskId,
271
+ audio_key: opts.audioKey,
272
+ generate_name: opts.generateName ?? "output",
273
+ format: opts.format ?? "wav",
274
+ preferences: opts.preferences,
275
+ });
276
+ return taskId;
277
+ }
278
+
279
+ /** Cancel a file-inference task. Omit `taskId` to stop the current one. */
280
+ stopTask(taskId?: string): void {
281
+ if (!this.signaling?.isOpen) {
282
+ throw new Error("Cannot stopTask: signaling channel is closed");
283
+ }
284
+ this.signaling.send({ type: "task_stop", task_id: taskId });
285
+ }
286
+
287
+ /**
288
+ * Promise-based file inference: (optionally upload the source audio,) start
289
+ * the task, and resolve with the `taskFinished` event on success. Rejects on
290
+ * task failure/cancellation, timeout, signaling close, or `signal` abort.
291
+ */
292
+ async runFileInference(
293
+ opts: RunFileInferenceOptions
294
+ ): Promise<TaskFinishedEvent> {
295
+ if (!opts.audioKey && !opts.audio) {
296
+ throw new Error(
297
+ "runFileInference requires either `audioKey` or `audio`"
298
+ );
299
+ }
300
+ const audioKey =
301
+ opts.audioKey ??
302
+ (await this.uploadAudio(opts.audio!, { signal: opts.signal })).key;
303
+
304
+ const timeoutMs = opts.timeoutMs ?? 300_000;
305
+
306
+ return new Promise<TaskFinishedEvent>((resolve, reject) => {
307
+ let taskId: string;
308
+ let timer: ReturnType<typeof setTimeout> | null = null;
309
+
310
+ const cleanup = () => {
311
+ offAck();
312
+ offProgress();
313
+ offFinished();
314
+ offErr();
315
+ offClosed();
316
+ if (timer) clearTimeout(timer);
317
+ if (opts.signal) opts.signal.removeEventListener("abort", onAbort);
318
+ };
319
+ const settleErr = (err: Error) => {
320
+ cleanup();
321
+ reject(err);
322
+ };
323
+ const onAbort = () => {
324
+ try {
325
+ this.stopTask(taskId);
326
+ } catch {
327
+ /* ignore */
328
+ }
329
+ settleErr(new DOMException("Aborted", "AbortError"));
330
+ };
331
+
332
+ const offAck = this.on("taskAck", (e) => {
333
+ if (e.taskId === taskId) opts.onAck?.(e);
334
+ });
335
+ const offProgress = this.on("taskProgress", (e) => {
336
+ if (e.taskId === taskId) opts.onProgress?.(e);
337
+ });
338
+ const offFinished = this.on("taskFinished", (e) => {
339
+ if (e.taskId !== taskId) return;
340
+ if (e.status === "success") {
341
+ cleanup();
342
+ resolve(e);
343
+ } else {
344
+ settleErr(
345
+ new Error(
346
+ e.error || `File inference task ${e.status}`
347
+ )
348
+ );
349
+ }
350
+ });
351
+ const offErr = this.on("error", (err) => settleErr(err));
352
+ const offClosed = this.on("closed", () =>
353
+ settleErr(new Error("Session closed before task finished"))
354
+ );
355
+
356
+ if (opts.signal?.aborted) {
357
+ offAck();
358
+ offProgress();
359
+ offFinished();
360
+ offErr();
361
+ offClosed();
362
+ reject(new DOMException("Aborted", "AbortError"));
363
+ return;
364
+ }
365
+ opts.signal?.addEventListener("abort", onAbort, { once: true });
366
+
367
+ timer = setTimeout(() => {
368
+ settleErr(
369
+ new Error(
370
+ `Timed out waiting for file inference task after ${timeoutMs}ms`
371
+ )
372
+ );
373
+ }, timeoutMs);
374
+
375
+ try {
376
+ taskId = this.startTask({
377
+ audioKey,
378
+ taskId: opts.taskId,
379
+ generateName: opts.generateName,
380
+ format: opts.format,
381
+ preferences: opts.preferences,
382
+ });
383
+ } catch (err) {
384
+ settleErr(err instanceof Error ? err : new Error(String(err)));
385
+ }
386
+ });
387
+ }
388
+
389
+ /** Mute / unmute the local mic by toggling the captured audio track. */
390
+ setMuted(muted: boolean): void {
391
+ const track = this.localStream?.getAudioTracks()[0];
392
+ if (track) track.enabled = !muted;
393
+ }
394
+
395
+ /** Replace the local input stream (mic) with a different `MediaStream` (hot-swap). */
396
+ async replaceLocalStream(newStream: MediaStream): Promise<void> {
397
+ if (!this.pc) throw new Error("PeerConnection is not active");
398
+ const newTrack = newStream.getAudioTracks()[0];
399
+ if (!newTrack) throw new Error("Replacement stream has no audio track");
400
+ const sender = this.pc
401
+ .getSenders()
402
+ .find((s) => s.track?.kind === "audio");
403
+ if (!sender) throw new Error("No audio sender on PeerConnection");
404
+ await sender.replaceTrack(newTrack);
405
+ this.stopTracks(this.localStream);
406
+ this.localStream = newStream;
407
+ }
408
+
409
+ /** Snapshot of jitter / loss / RTT (sampled from `RTCPeerConnection.getStats`). */
410
+ async getStats(): Promise<ConnectionStats | null> {
411
+ if (!this.pc || this.pc.connectionState !== "connected") return null;
412
+ const report = await this.pc.getStats();
413
+ const stats: ConnectionStats = { rttMs: 0, jitter: 0, packetsLost: 0 };
414
+ report.forEach((entry: any) => {
415
+ if (
416
+ entry.type === "candidate-pair" &&
417
+ entry.state === "succeeded" &&
418
+ typeof entry.currentRoundTripTime === "number"
419
+ ) {
420
+ stats.rttMs = Math.round(entry.currentRoundTripTime * 1000);
421
+ }
422
+ if (entry.type === "inbound-rtp" && entry.kind === "audio") {
423
+ if (typeof entry.jitter === "number") stats.jitter = entry.jitter;
424
+ if (typeof entry.packetsLost === "number") {
425
+ stats.packetsLost = entry.packetsLost;
426
+ }
427
+ }
428
+ });
429
+ return stats;
430
+ }
431
+
432
+ /** Gracefully end the session — notifies the server, closes the PC. */
433
+ async disconnect(): Promise<void> {
434
+ if (this.state === "closed" || this.state === "closing") return;
435
+ this.setState("closing");
436
+ this.clearServiceReadyTimer();
437
+
438
+ try {
439
+ if (this.signaling?.isOpen) {
440
+ try {
441
+ this.signaling.send({ type: "exit" });
442
+ // Give the frame a tick to flush before we yank the socket.
443
+ await new Promise((r) => setTimeout(r, 25));
444
+ } catch {
445
+ /* socket already gone */
446
+ }
447
+ }
448
+ } finally {
449
+ this.tearDownPeer();
450
+ this.signaling?.close();
451
+ this.signaling = null;
452
+ this.setState("closed");
453
+ this.emit("closed", {});
454
+ }
455
+ }
456
+
457
+ // ---------------------------------------------------------------------
458
+ // Internal
459
+ // ---------------------------------------------------------------------
460
+
461
+ private async openSignaling(): Promise<void> {
462
+ const channel = new SignalingChannel({
463
+ signalingUrl: this.opts.signalingUrl,
464
+ apiKey: this.opts.apiKey,
465
+ accessToken: this.opts.accessToken,
466
+ connectTimeoutMs: this.opts.connectTimeoutMs,
467
+ logger: this.logger,
468
+ });
469
+ await channel.connect({
470
+ onMessage: (msg) => this.handleSignalingMessage(msg),
471
+ onClose: (e) => this.handleSignalingClose(e),
472
+ onError: (e) =>
473
+ this.logger.warn?.("[convbased-sdk] signaling error event", e),
474
+ });
475
+ this.signaling = channel;
476
+ }
477
+
478
+ private async resolveIceServers(): Promise<RTCServersConfig[]> {
479
+ if (this.opts.iceServers?.length) return this.opts.iceServers;
480
+ if (typeof this.opts.graphqlUrl === "string") {
481
+ try {
482
+ const cfg = await fetchRTCServers({
483
+ graphqlUrl: this.opts.graphqlUrl,
484
+ apiKey: this.opts.apiKey,
485
+ accessToken: this.opts.accessToken,
486
+ });
487
+ if (cfg.urls?.length) return [cfg];
488
+ } catch (e) {
489
+ this.logger.warn?.(
490
+ "[convbased-sdk] fetchRTCServers failed, falling back to STUN:",
491
+ e
492
+ );
493
+ }
494
+ }
495
+ return DEFAULT_STUN_SERVERS;
496
+ }
497
+
498
+ private async openPeer(
499
+ opts: ConnectOptions,
500
+ iceServers: RTCServersConfig[]
501
+ ): Promise<void> {
502
+ const pc = new RTCPeerConnection({
503
+ iceServers: iceServers as RTCIceServer[],
504
+ iceTransportPolicy: this.opts.iceTransportPolicy,
505
+ });
506
+ this.pc = pc;
507
+
508
+ pc.onicecandidate = (event) => {
509
+ if (event.candidate && this.signaling?.isOpen) {
510
+ this.signaling.send({
511
+ type: "ice_candidate",
512
+ candidate: event.candidate.toJSON(),
513
+ });
514
+ }
515
+ };
516
+ pc.ontrack = (event) => {
517
+ const stream = event.streams[0] ?? new MediaStream([event.track]);
518
+ this.convertedStream = stream;
519
+ this.emit("track", { stream, track: event.track });
520
+ };
521
+ pc.onconnectionstatechange = () => {
522
+ const cs = pc.connectionState;
523
+ this.logger.debug?.("[convbased-sdk] pc state:", cs);
524
+ if (cs === "connecting") {
525
+ if (this.state === "negotiating") this.setState("connecting");
526
+ } else if (cs === "failed" || cs === "disconnected" || cs === "closed") {
527
+ if (this.state !== "closing" && this.state !== "closed") {
528
+ this.emit(
529
+ "error",
530
+ new Error(`PeerConnection entered "${cs}" state`)
531
+ );
532
+ void this.disconnect();
533
+ }
534
+ }
535
+ };
536
+
537
+ this.localStream = await this.acquireLocalStream(opts.audio);
538
+ for (const track of this.localStream.getAudioTracks()) {
539
+ pc.addTrack(track, this.localStream);
540
+ }
541
+
542
+ const offer = await pc.createOffer({ offerToReceiveAudio: true });
543
+ offer.sdp = applyOpusSdpOptions(offer.sdp ?? "", {
544
+ bitrateKbps: this.opts.bitrate,
545
+ stereo: this.opts.stereo,
546
+ });
547
+ await pc.setLocalDescription(offer);
548
+
549
+ const sampleRate =
550
+ opts.sampleRate ??
551
+ detectSampleRate(this.localStream) ??
552
+ 48000;
553
+
554
+ const preferences: RTCPreferences = {
555
+ ...(opts.preferences ?? {}),
556
+ model_id: opts.modelId,
557
+ sample_rate: sampleRate,
558
+ };
559
+
560
+ if (!this.signaling?.isOpen) {
561
+ throw new Error("Signaling channel closed before offer was sent");
562
+ }
563
+ this.offerInFlight = true;
564
+ this.signaling.send({ type: "offer", sdp: offer.sdp, preferences });
565
+ }
566
+
567
+ private async acquireLocalStream(
568
+ audio: ConnectOptions["audio"]
569
+ ): Promise<MediaStream> {
570
+ if (audio instanceof MediaStream) return audio;
571
+ const constraints: MediaStreamConstraints = {
572
+ audio: audio === undefined ? true : (audio as boolean | MediaTrackConstraints),
573
+ video: false,
574
+ };
575
+ if (
576
+ typeof navigator === "undefined" ||
577
+ !navigator.mediaDevices?.getUserMedia
578
+ ) {
579
+ throw new Error(
580
+ "navigator.mediaDevices.getUserMedia is unavailable — pass a MediaStream via `audio`"
581
+ );
582
+ }
583
+ return navigator.mediaDevices.getUserMedia(constraints);
584
+ }
585
+
586
+ private waitForServiceReady(): Promise<void> {
587
+ return new Promise<void>((resolve, reject) => {
588
+ const off = this.on("ready", () => {
589
+ cleanup();
590
+ resolve();
591
+ });
592
+ const offErr = this.on("error", (err) => {
593
+ cleanup();
594
+ reject(err);
595
+ });
596
+ const offClosed = this.on("closed", () => {
597
+ cleanup();
598
+ reject(new Error("Signaling closed before SERVICE_READY"));
599
+ });
600
+
601
+ this.serviceReadyTimer = setTimeout(() => {
602
+ cleanup();
603
+ reject(
604
+ new Error(
605
+ `Timed out waiting for SERVICE_READY after ${this.opts.signalingTimeoutMs}ms`
606
+ )
607
+ );
608
+ }, this.opts.signalingTimeoutMs);
609
+
610
+ const cleanup = () => {
611
+ off();
612
+ offErr();
613
+ offClosed();
614
+ this.clearServiceReadyTimer();
615
+ };
616
+ });
617
+ }
618
+
619
+ private clearServiceReadyTimer(): void {
620
+ if (this.serviceReadyTimer) {
621
+ clearTimeout(this.serviceReadyTimer);
622
+ this.serviceReadyTimer = null;
623
+ }
624
+ }
625
+
626
+ private handleSignalingMessage(msg: IncomingMessage): void {
627
+ const type = (msg as { type?: string }).type;
628
+ this.emit("message", {
629
+ code: (msg as { code?: number }).code,
630
+ message: (msg as { message?: string }).message,
631
+ raw: msg,
632
+ });
633
+
634
+ switch (type) {
635
+ case "answer":
636
+ void this.applyAnswer(msg as { sdp: string });
637
+ break;
638
+
639
+ case "ice_candidate":
640
+ void this.applyRemoteCandidate(
641
+ msg as { candidate: RTCIceCandidateInit }
642
+ );
643
+ break;
644
+
645
+ case "task_ack": {
646
+ const m = msg as {
647
+ task_id: string;
648
+ status: "queued" | "started";
649
+ queue_position?: number;
650
+ code?: number;
651
+ };
652
+ this.emit("taskAck", {
653
+ taskId: m.task_id,
654
+ status: m.status,
655
+ queuePosition: m.queue_position,
656
+ code: m.code,
657
+ });
658
+ break;
659
+ }
660
+
661
+ case "task_progress": {
662
+ const m = msg as {
663
+ task_id: string;
664
+ progress: number;
665
+ code?: number;
666
+ };
667
+ this.emit("taskProgress", {
668
+ taskId: m.task_id,
669
+ progress: m.progress,
670
+ code: m.code,
671
+ });
672
+ break;
673
+ }
674
+
675
+ case "task_finished": {
676
+ const m = msg as {
677
+ task_id: string;
678
+ status: TaskStatus;
679
+ result_key?: string;
680
+ download_url?: string;
681
+ error?: string;
682
+ code?: number;
683
+ };
684
+ this.emit("taskFinished", {
685
+ taskId: m.task_id,
686
+ status: m.status,
687
+ resultKey: m.result_key,
688
+ downloadUrl: m.download_url,
689
+ error: m.error,
690
+ code: m.code,
691
+ });
692
+ break;
693
+ }
694
+
695
+ case "message": {
696
+ const code = (msg as { code?: number }).code;
697
+ const text = (msg as { message?: string }).message;
698
+ if (code === RTCStatusCode.SERVICE_READY) {
699
+ this.emit("ready", { code, message: text });
700
+ } else if (
701
+ code === RTCStatusCode.ERROR ||
702
+ code === RTCStatusCode.GPU_INSUFFICIENT ||
703
+ code === RTCStatusCode.UNPAID_SERVICE ||
704
+ code === RTCStatusCode.MODEL_NOT_FOUND ||
705
+ code === RTCStatusCode.DUPLICATE_CONNECTION ||
706
+ code === RTCStatusCode.REQUEST_TOO_FAST
707
+ ) {
708
+ this.emit(
709
+ "error",
710
+ new Error(text || `Server reported error code ${code}`)
711
+ );
712
+ } else if (code === RTCStatusCode.SHUTDOWN) {
713
+ void this.disconnect();
714
+ }
715
+ break;
716
+ }
717
+
718
+ case "shutdown":
719
+ case "error": {
720
+ const text = (msg as { message?: string }).message;
721
+ this.emit(
722
+ "error",
723
+ new Error(text || `Server sent "${type}"`)
724
+ );
725
+ void this.disconnect();
726
+ break;
727
+ }
728
+
729
+ case "ping":
730
+ if (this.signaling?.isOpen) this.signaling.send({ type: "pong" });
731
+ break;
732
+
733
+ case "pong":
734
+ default:
735
+ // task_* and other passthrough messages are surfaced via the
736
+ // generic "message" event above.
737
+ break;
738
+ }
739
+ }
740
+
741
+ private async applyAnswer(msg: { sdp: string }): Promise<void> {
742
+ if (!this.pc) return;
743
+ try {
744
+ await this.pc.setRemoteDescription({ type: "answer", sdp: msg.sdp });
745
+ this.offerInFlight = false;
746
+ } catch (e) {
747
+ this.emit(
748
+ "error",
749
+ new Error(`Failed to apply remote answer: ${describeErr(e)}`)
750
+ );
751
+ }
752
+ }
753
+
754
+ private async applyRemoteCandidate(msg: {
755
+ candidate: RTCIceCandidateInit;
756
+ }): Promise<void> {
757
+ if (!this.pc) return;
758
+ try {
759
+ await this.pc.addIceCandidate(msg.candidate);
760
+ } catch (e) {
761
+ this.logger.warn?.("[convbased-sdk] addIceCandidate failed:", e);
762
+ }
763
+ }
764
+
765
+ private handleSignalingClose(event: CloseEvent): void {
766
+ if (this.state === "closing" || this.state === "closed") return;
767
+ this.emit("closed", { code: event.code, reason: event.reason });
768
+ // 1008 is what the server sends for auth + balance failures.
769
+ if (event.code === 1008) {
770
+ this.emit(
771
+ "error",
772
+ new Error(
773
+ `Signaling rejected the connection: ${event.reason || "policy violation"}`
774
+ )
775
+ );
776
+ } else if (event.code !== 1000) {
777
+ this.emit(
778
+ "error",
779
+ new Error(
780
+ `Signaling closed unexpectedly (code=${event.code}, reason=${event.reason || "?"})`
781
+ )
782
+ );
783
+ }
784
+ this.tearDownPeer();
785
+ this.setState("closed");
786
+ }
787
+
788
+ private tearDownPeer(): void {
789
+ if (this.pc) {
790
+ try {
791
+ this.pc.onicecandidate = null;
792
+ this.pc.ontrack = null;
793
+ this.pc.onconnectionstatechange = null;
794
+ this.pc.close();
795
+ } catch {
796
+ /* ignore */
797
+ }
798
+ this.pc = null;
799
+ }
800
+ this.stopTracks(this.localStream);
801
+ this.localStream = null;
802
+ this.convertedStream = null;
803
+ }
804
+
805
+ private stopTracks(stream: MediaStream | null): void {
806
+ stream?.getTracks().forEach((t) => {
807
+ try {
808
+ t.stop();
809
+ } catch {
810
+ /* ignore */
811
+ }
812
+ });
813
+ }
814
+
815
+ private setState(next: ConnectionState): void {
816
+ if (next === this.state) return;
817
+ const previous = this.state;
818
+ this.state = next;
819
+ this.emit("state", { state: next, previous });
820
+ }
821
+ }
822
+
823
+ function generateTaskId(): string {
824
+ if (typeof crypto !== "undefined" && "randomUUID" in crypto) {
825
+ return crypto.randomUUID();
826
+ }
827
+ return `${Date.now()}_${Math.random().toString(16).slice(2)}`;
828
+ }
829
+
830
+ function detectSampleRate(stream: MediaStream): number | null {
831
+ const track = stream.getAudioTracks()[0];
832
+ const rate = track?.getSettings()?.sampleRate;
833
+ return typeof rate === "number" ? rate : null;
834
+ }
835
+
836
+ function describeErr(e: unknown): string {
837
+ if (e instanceof Error) return e.message;
838
+ return String(e);
839
+ }