@convbased/sdk 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. package/LICENSE +21 -0
  2. package/README.md +235 -0
  3. package/dist/cjs/client.js +635 -0
  4. package/dist/cjs/client.js.map +1 -0
  5. package/dist/cjs/endpoints.js +10 -0
  6. package/dist/cjs/endpoints.js.map +1 -0
  7. package/dist/cjs/events.js +39 -0
  8. package/dist/cjs/events.js.map +1 -0
  9. package/dist/cjs/graphql.js +40 -0
  10. package/dist/cjs/graphql.js.map +1 -0
  11. package/dist/cjs/index.js +24 -0
  12. package/dist/cjs/index.js.map +1 -0
  13. package/dist/cjs/package.json +3 -0
  14. package/dist/cjs/rtcServers.js +35 -0
  15. package/dist/cjs/rtcServers.js.map +1 -0
  16. package/dist/cjs/sdp.js +37 -0
  17. package/dist/cjs/sdp.js.map +1 -0
  18. package/dist/cjs/signaling.js +146 -0
  19. package/dist/cjs/signaling.js.map +1 -0
  20. package/dist/cjs/tts.js +227 -0
  21. package/dist/cjs/tts.js.map +1 -0
  22. package/dist/cjs/types.js +26 -0
  23. package/dist/cjs/types.js.map +1 -0
  24. package/dist/cjs/upload.js +87 -0
  25. package/dist/cjs/upload.js.map +1 -0
  26. package/dist/client.d.ts +169 -0
  27. package/dist/client.d.ts.map +1 -0
  28. package/dist/client.js +631 -0
  29. package/dist/client.js.map +1 -0
  30. package/dist/convbased-sdk.global.js +1291 -0
  31. package/dist/endpoints.d.ts +3 -0
  32. package/dist/endpoints.d.ts.map +1 -0
  33. package/dist/endpoints.js +7 -0
  34. package/dist/endpoints.js.map +1 -0
  35. package/dist/events.d.ts +9 -0
  36. package/dist/events.d.ts.map +1 -0
  37. package/dist/events.js +35 -0
  38. package/dist/events.js.map +1 -0
  39. package/dist/graphql.d.ts +18 -0
  40. package/dist/graphql.d.ts.map +1 -0
  41. package/dist/graphql.js +37 -0
  42. package/dist/graphql.js.map +1 -0
  43. package/dist/index.d.ts +9 -0
  44. package/dist/index.d.ts.map +1 -0
  45. package/dist/index.js +9 -0
  46. package/dist/index.js.map +1 -0
  47. package/dist/rtcServers.d.ts +13 -0
  48. package/dist/rtcServers.d.ts.map +1 -0
  49. package/dist/rtcServers.js +31 -0
  50. package/dist/rtcServers.js.map +1 -0
  51. package/dist/sdp.d.ts +6 -0
  52. package/dist/sdp.d.ts.map +1 -0
  53. package/dist/sdp.js +34 -0
  54. package/dist/sdp.js.map +1 -0
  55. package/dist/signaling.d.ts +33 -0
  56. package/dist/signaling.d.ts.map +1 -0
  57. package/dist/signaling.js +142 -0
  58. package/dist/signaling.js.map +1 -0
  59. package/dist/tts.d.ts +111 -0
  60. package/dist/tts.d.ts.map +1 -0
  61. package/dist/tts.js +223 -0
  62. package/dist/tts.js.map +1 -0
  63. package/dist/types.d.ts +194 -0
  64. package/dist/types.d.ts.map +1 -0
  65. package/dist/types.js +23 -0
  66. package/dist/types.js.map +1 -0
  67. package/dist/upload.d.ts +46 -0
  68. package/dist/upload.d.ts.map +1 -0
  69. package/dist/upload.js +82 -0
  70. package/dist/upload.js.map +1 -0
  71. package/package.json +57 -0
  72. package/src/client.ts +839 -0
  73. package/src/endpoints.ts +8 -0
  74. package/src/events.ts +38 -0
  75. package/src/graphql.ts +58 -0
  76. package/src/index.ts +50 -0
  77. package/src/rtcServers.ts +38 -0
  78. package/src/sdp.ts +45 -0
  79. package/src/signaling.ts +172 -0
  80. package/src/tts.ts +364 -0
  81. package/src/types.ts +201 -0
  82. package/src/upload.ts +132 -0
package/src/tts.ts ADDED
@@ -0,0 +1,364 @@
1
+ // Text-to-speech client for the Convbased IndexTTS2 service. This path is pure
2
+ // GraphQL — it does not touch WebRTC or the signaling socket. Synthesis is
3
+ // asynchronous: you submit a job, then poll until it reaches a terminal state.
4
+ //
5
+ // Typical flow:
6
+ // const tts = new TtsClient({ apiKey });
7
+ // const { key } = await tts.uploadReferenceAudio(file); // reference voice
8
+ // const result = await tts.synthesize({ referenceKey: key, text: "你好" });
9
+ // audio.src = result.url; // presigned, ~1h
10
+
11
+ import { DEFAULT_GRAPHQL_URL } from "./endpoints.js";
12
+ import { graphqlRequest } from "./graphql.js";
13
+ import { uploadAudio } from "./upload.js";
14
+
15
+ /** Optional emotion / sampling controls forwarded verbatim to IndexTTS2. */
16
+ export interface TtsParams {
17
+ emo_alpha?: number;
18
+ emo_vector?: number[];
19
+ use_emo_text?: boolean;
20
+ emo_text?: string;
21
+ temperature?: number;
22
+ top_p?: number;
23
+ top_k?: number;
24
+ }
25
+
26
+ export type TtsJobStatus =
27
+ | "queued"
28
+ | "warming"
29
+ | "processing"
30
+ | "done"
31
+ | "failed"
32
+ | "cancelled";
33
+
34
+ export interface TtsResult {
35
+ /** COS key of the synthesized audio. */
36
+ key: string;
37
+ /** Presigned URL of the synthesized audio (valid ~1h), or null if unsigned. */
38
+ url: string | null;
39
+ /** Input-text token count the charge was based on. */
40
+ tokenCount: number;
41
+ /** Duration of the synthesized audio, in seconds. */
42
+ audioDurationSec: number;
43
+ /** Amount deducted from the wallet for this synthesis. */
44
+ amountCharged: number;
45
+ /** Wallet balance after the deduction. */
46
+ balanceAfter: number;
47
+ }
48
+
49
+ export interface TtsJob {
50
+ jobId: string;
51
+ status: TtsJobStatus;
52
+ /** 1-based queue position while `queued`; 0 otherwise. */
53
+ position: number;
54
+ /** Result, populated once `status === "done"`. */
55
+ result: TtsResult | null;
56
+ /** Server-reported error key when `status === "failed"`. */
57
+ error: string | null;
58
+ }
59
+
60
+ export interface TtsPricing {
61
+ /** Price charged per input-text token. */
62
+ pricePerToken: number;
63
+ /** Minimum charge applied to any single synthesis. */
64
+ minCharge: number;
65
+ }
66
+
67
+ export interface TtsClientOptions {
68
+ /** Convbased API key. Required unless `accessToken` is provided. */
69
+ apiKey?: string;
70
+ /** Short-lived JWT access token (alternative to `apiKey`). */
71
+ accessToken?: string;
72
+ /**
73
+ * GraphQL endpoint. Defaults to the production Convbased endpoint
74
+ * (`https://api.weights.chat/api/v1/graphql`). Override for self-hosted
75
+ * deployments.
76
+ */
77
+ graphqlUrl?: string;
78
+ /** Optional logger; defaults to `console` for warn/error only. */
79
+ logger?: Partial<Pick<Console, "debug" | "info" | "warn" | "error">>;
80
+ }
81
+
82
+ export interface SubmitTtsOptions {
83
+ /** COS key of an already-uploaded reference voice (see `uploadReferenceAudio`). */
84
+ referenceKey: string;
85
+ /** Text to synthesize. */
86
+ text: string;
87
+ /** Optional emotion / sampling controls. */
88
+ params?: TtsParams;
89
+ }
90
+
91
+ export interface SynthesizeOptions {
92
+ /** COS key of an already-uploaded reference voice. Provide this or `referenceAudio`. */
93
+ referenceKey?: string;
94
+ /** A reference-voice `Blob`/`File` to upload first. Provide this or `referenceKey`. */
95
+ referenceAudio?: Blob;
96
+ /** Text to synthesize. */
97
+ text: string;
98
+ /** Optional emotion / sampling controls. */
99
+ params?: TtsParams;
100
+ /** Poll interval while waiting for the job, in ms. Default 1500. */
101
+ pollIntervalMs?: number;
102
+ /** Give up waiting after this many ms. Default 300_000 (5 min). */
103
+ timeoutMs?: number;
104
+ /** Abort the wait (and cancel the job if still queued). */
105
+ signal?: AbortSignal;
106
+ /** Called on every poll with the latest job snapshot (queue position, status…). */
107
+ onJob?: (job: TtsJob) => void;
108
+ }
109
+
110
+ interface TtsJobWire {
111
+ job_id: string;
112
+ status: TtsJobStatus;
113
+ position: number;
114
+ result: {
115
+ key: string;
116
+ url: string | null;
117
+ token_count: number;
118
+ audio_duration_sec: number;
119
+ amount_charged: number;
120
+ balance_after: number;
121
+ } | null;
122
+ error: string | null;
123
+ }
124
+
125
+ const JOB_FIELDS = /* GraphQL */ `
126
+ job_id
127
+ status
128
+ position
129
+ result {
130
+ key
131
+ url
132
+ token_count
133
+ audio_duration_sec
134
+ amount_charged
135
+ balance_after
136
+ }
137
+ error
138
+ `;
139
+
140
+ export class TtsClient {
141
+ private readonly graphqlUrl: string;
142
+ private readonly apiKey?: string;
143
+ private readonly accessToken?: string;
144
+ private readonly logger: Pick<Console, "debug" | "info" | "warn" | "error">;
145
+
146
+ constructor(options: TtsClientOptions) {
147
+ if (!options.apiKey && !options.accessToken) {
148
+ throw new Error("TtsClient requires either `apiKey` or `accessToken`");
149
+ }
150
+ this.graphqlUrl = options.graphqlUrl ?? DEFAULT_GRAPHQL_URL;
151
+ this.apiKey = options.apiKey;
152
+ this.accessToken = options.accessToken;
153
+ const provided = options.logger ?? {};
154
+ this.logger = {
155
+ debug: provided.debug ?? (() => {}),
156
+ info: provided.info ?? (() => {}),
157
+ warn: provided.warn ?? console.warn.bind(console),
158
+ error: provided.error ?? console.error.bind(console),
159
+ };
160
+ }
161
+
162
+ /** Upload a reference-voice `Blob`/`File` and resolve its COS key. */
163
+ async uploadReferenceAudio(
164
+ file: Blob,
165
+ opts?: { filename?: string; contentType?: string; signal?: AbortSignal }
166
+ ): Promise<{ key: string }> {
167
+ return uploadAudio({
168
+ graphqlUrl: this.graphqlUrl,
169
+ apiKey: this.apiKey,
170
+ accessToken: this.accessToken,
171
+ file,
172
+ filename: opts?.filename,
173
+ contentType: opts?.contentType,
174
+ signal: opts?.signal,
175
+ });
176
+ }
177
+
178
+ /** Current billing rule: `cost = max(tokens * pricePerToken, minCharge)`. */
179
+ async getPricing(signal?: AbortSignal): Promise<TtsPricing> {
180
+ const data = await graphqlRequest<{
181
+ ttsPricing: { price_per_token: number; min_charge: number };
182
+ }>({
183
+ graphqlUrl: this.graphqlUrl,
184
+ apiKey: this.apiKey,
185
+ accessToken: this.accessToken,
186
+ signal,
187
+ query: /* GraphQL */ `
188
+ query {
189
+ ttsPricing {
190
+ price_per_token
191
+ min_charge
192
+ }
193
+ }
194
+ `,
195
+ });
196
+ return {
197
+ pricePerToken: data.ttsPricing.price_per_token,
198
+ minCharge: data.ttsPricing.min_charge,
199
+ };
200
+ }
201
+
202
+ /** Enqueue a synthesis job; resolves immediately with the queued job. */
203
+ async submit(opts: SubmitTtsOptions, signal?: AbortSignal): Promise<TtsJob> {
204
+ const data = await graphqlRequest<{ submitTts: TtsJobWire }>({
205
+ graphqlUrl: this.graphqlUrl,
206
+ apiKey: this.apiKey,
207
+ accessToken: this.accessToken,
208
+ signal,
209
+ query: /* GraphQL */ `
210
+ mutation SubmitTts($input: SynthesizeTtsInput!) {
211
+ submitTts(input: $input) {
212
+ ${JOB_FIELDS}
213
+ }
214
+ }
215
+ `,
216
+ variables: {
217
+ input: {
218
+ reference_key: opts.referenceKey,
219
+ text: opts.text,
220
+ params: opts.params ?? null,
221
+ },
222
+ },
223
+ });
224
+ return toJob(data.submitTts);
225
+ }
226
+
227
+ /** Read the current status/result of a job. */
228
+ async getJob(jobId: string, signal?: AbortSignal): Promise<TtsJob> {
229
+ const data = await graphqlRequest<{ ttsJob: TtsJobWire }>({
230
+ graphqlUrl: this.graphqlUrl,
231
+ apiKey: this.apiKey,
232
+ accessToken: this.accessToken,
233
+ signal,
234
+ query: /* GraphQL */ `
235
+ query TtsJob($jobId: String!) {
236
+ ttsJob(jobId: $jobId) {
237
+ ${JOB_FIELDS}
238
+ }
239
+ }
240
+ `,
241
+ variables: { jobId },
242
+ });
243
+ return toJob(data.ttsJob);
244
+ }
245
+
246
+ /** Cancel a job. Only effective while it is still `queued`. */
247
+ async cancel(jobId: string, signal?: AbortSignal): Promise<TtsJob> {
248
+ const data = await graphqlRequest<{ cancelTtsJob: TtsJobWire }>({
249
+ graphqlUrl: this.graphqlUrl,
250
+ apiKey: this.apiKey,
251
+ accessToken: this.accessToken,
252
+ signal,
253
+ query: /* GraphQL */ `
254
+ mutation CancelTtsJob($jobId: String!) {
255
+ cancelTtsJob(jobId: $jobId) {
256
+ ${JOB_FIELDS}
257
+ }
258
+ }
259
+ `,
260
+ variables: { jobId },
261
+ });
262
+ return toJob(data.cancelTtsJob);
263
+ }
264
+
265
+ /**
266
+ * One-call synthesis: (optionally upload the reference voice,) submit, then
267
+ * poll until the job finishes. Resolves with the `TtsResult` on success;
268
+ * rejects if the job fails/cancels, times out, or `signal` aborts.
269
+ */
270
+ async synthesize(opts: SynthesizeOptions): Promise<TtsResult> {
271
+ if (!opts.referenceKey && !opts.referenceAudio) {
272
+ throw new Error(
273
+ "synthesize() requires either `referenceKey` or `referenceAudio`"
274
+ );
275
+ }
276
+ const pollIntervalMs = opts.pollIntervalMs ?? 1500;
277
+ const timeoutMs = opts.timeoutMs ?? 300_000;
278
+ const deadline = Date.now() + timeoutMs;
279
+
280
+ const referenceKey =
281
+ opts.referenceKey ??
282
+ (await this.uploadReferenceAudio(opts.referenceAudio!, {
283
+ signal: opts.signal,
284
+ })).key;
285
+
286
+ const submitted = await this.submit(
287
+ { referenceKey, text: opts.text, params: opts.params },
288
+ opts.signal
289
+ );
290
+ opts.onJob?.(submitted);
291
+
292
+ let job = submitted;
293
+ try {
294
+ while (job.status !== "done") {
295
+ if (opts.signal?.aborted) {
296
+ throw new DOMException("Aborted", "AbortError");
297
+ }
298
+ if (job.status === "failed") {
299
+ throw new Error(job.error || "TTS job failed");
300
+ }
301
+ if (job.status === "cancelled") {
302
+ throw new Error("TTS job was cancelled");
303
+ }
304
+ if (Date.now() > deadline) {
305
+ throw new Error(
306
+ `Timed out waiting for TTS job ${job.jobId} after ${timeoutMs}ms`
307
+ );
308
+ }
309
+ await delay(pollIntervalMs, opts.signal);
310
+ job = await this.getJob(job.jobId, opts.signal);
311
+ opts.onJob?.(job);
312
+ }
313
+ } catch (err) {
314
+ // Best-effort: stop a still-queued job so we don't pay for a result
315
+ // nobody is waiting for.
316
+ if (opts.signal?.aborted) {
317
+ this.cancel(job.jobId).catch(() => {});
318
+ }
319
+ throw err;
320
+ }
321
+
322
+ if (!job.result) {
323
+ throw new Error("TTS job is done but carried no result");
324
+ }
325
+ return job.result;
326
+ }
327
+ }
328
+
329
+ function toJob(wire: TtsJobWire): TtsJob {
330
+ return {
331
+ jobId: wire.job_id,
332
+ status: wire.status,
333
+ position: wire.position,
334
+ result: wire.result
335
+ ? {
336
+ key: wire.result.key,
337
+ url: wire.result.url,
338
+ tokenCount: wire.result.token_count,
339
+ audioDurationSec: wire.result.audio_duration_sec,
340
+ amountCharged: wire.result.amount_charged,
341
+ balanceAfter: wire.result.balance_after,
342
+ }
343
+ : null,
344
+ error: wire.error,
345
+ };
346
+ }
347
+
348
+ function delay(ms: number, signal?: AbortSignal): Promise<void> {
349
+ return new Promise<void>((resolve, reject) => {
350
+ if (signal?.aborted) {
351
+ reject(new DOMException("Aborted", "AbortError"));
352
+ return;
353
+ }
354
+ const timer = setTimeout(() => {
355
+ signal?.removeEventListener("abort", onAbort);
356
+ resolve();
357
+ }, ms);
358
+ const onAbort = () => {
359
+ clearTimeout(timer);
360
+ reject(new DOMException("Aborted", "AbortError"));
361
+ };
362
+ signal?.addEventListener("abort", onAbort, { once: true });
363
+ });
364
+ }
package/src/types.ts ADDED
@@ -0,0 +1,201 @@
1
+ // Wire-protocol types mirrored from ServerAPI / signaling.
2
+
3
+ export interface RTCServersConfig {
4
+ urls: string[];
5
+ username?: string;
6
+ credential?: string;
7
+ }
8
+
9
+ export interface RTCPreferences {
10
+ model_id: string;
11
+ sample_rate: number;
12
+ pitch?: number;
13
+ rms_mix_rate?: number;
14
+ f0_threshold?: number;
15
+ block_time?: number;
16
+ crossfade_time?: number;
17
+ extra_time?: number;
18
+ f0_autotune?: boolean;
19
+ f0_autotune_strength?: number;
20
+ proposed_pitch?: boolean;
21
+ proposed_pitch_threshold?: number;
22
+ enable_limiter?: boolean;
23
+ limiter_threshold?: number;
24
+ enable_lookahead?: boolean;
25
+ lookahead_time?: number;
26
+ formant?: number;
27
+ index_rate?: number;
28
+ protect?: number;
29
+ threshold?: number;
30
+ [key: string]: unknown;
31
+ }
32
+
33
+ /**
34
+ * Per-task parameters for offline file inference (voice-to-voice). Forwarded
35
+ * verbatim to the inference node alongside `task_start`. Distinct from the
36
+ * live `RTCPreferences` — file inference exposes `f0_method`, `use_pv`, etc.
37
+ */
38
+ export interface FileInferencePreferences {
39
+ pitch?: number;
40
+ f0_method?: "rmvpe" | "fcpe";
41
+ f0_threshold?: number;
42
+ index_rate?: number;
43
+ protect?: number;
44
+ f0_autotune?: boolean;
45
+ f0_autotune_strength?: number;
46
+ proposed_pitch?: boolean;
47
+ proposed_pitch_threshold?: number;
48
+ sample_rate?: number;
49
+ formant?: number;
50
+ block_time?: number;
51
+ crossfade_time?: number;
52
+ extra_time?: number;
53
+ use_pv?: boolean;
54
+ rms_mix_rate?: number;
55
+ threshold?: number;
56
+ enable_limiter?: boolean;
57
+ limiter_threshold?: number;
58
+ enable_lookahead?: boolean;
59
+ lookahead_time?: number;
60
+ [key: string]: unknown;
61
+ }
62
+
63
+ export type OutgoingMessage =
64
+ | { type: "offer"; sdp?: string; preferences: RTCPreferences }
65
+ | { type: "ice_candidate"; candidate: RTCIceCandidateInit }
66
+ | { type: "config"; preferences: Partial<RTCPreferences> }
67
+ | {
68
+ type: "task_start";
69
+ task_id: string;
70
+ audio_key: string;
71
+ generate_name?: string;
72
+ format?: string;
73
+ preferences?: FileInferencePreferences;
74
+ }
75
+ | { type: "task_stop"; task_id?: string }
76
+ | { type: "exit" }
77
+ | { type: "ping" }
78
+ | { type: "pong" };
79
+
80
+ export enum RTCStatusCode {
81
+ ERROR = 2000,
82
+ GPU_INSUFFICIENT = 2001,
83
+ DUPLICATE_CONNECTION = 2002,
84
+ MODEL_NOT_FOUND = 2003,
85
+ UNPAID_SERVICE = 2004,
86
+ REQUEST_TOO_FAST = 2005,
87
+
88
+ CONNECTED = 3000,
89
+ REQUEST_RECEIVED = 3001,
90
+ TRACK_READY = 3002,
91
+ RESPONSE_SENT = 3003,
92
+ LOADING_MODEL = 3004,
93
+ SERVICE_READY = 3009,
94
+
95
+ // File inference (voice-to-voice) task lifecycle codes.
96
+ TASK_PROGRESS = 3010,
97
+ TASK_FINISHED = 3011,
98
+ TASK_ACK = 3012,
99
+
100
+ SHUTDOWN = 4000,
101
+ SERVER_CLOSED = 5000,
102
+ }
103
+
104
+ export type TaskStatus = "success" | "failure" | "cancelled";
105
+
106
+ export type IncomingMessage =
107
+ | {
108
+ type: "message" | "shutdown" | "error";
109
+ message?: string;
110
+ code?: number;
111
+ }
112
+ | { type: "answer"; sdp: string }
113
+ | { type: "ice_candidate"; candidate: RTCIceCandidateInit }
114
+ | {
115
+ type: "task_ack";
116
+ task_id: string;
117
+ status: "queued" | "started";
118
+ queue_position?: number;
119
+ code?: number;
120
+ }
121
+ | { type: "task_progress"; task_id: string; progress: number; code?: number }
122
+ | {
123
+ type: "task_finished";
124
+ task_id: string;
125
+ status: TaskStatus;
126
+ result_key?: string;
127
+ download_url?: string;
128
+ error?: string;
129
+ code?: number;
130
+ }
131
+ | { type: "ping" }
132
+ | { type: "pong" }
133
+ | Record<string, unknown>;
134
+
135
+ export type ConnectionState =
136
+ | "idle"
137
+ | "signaling"
138
+ | "negotiating"
139
+ | "connecting"
140
+ | "connected"
141
+ | "closing"
142
+ | "closed"
143
+ | "error";
144
+
145
+ export interface ConvbasedClientOptions {
146
+ /** Convbased API key issued in the Web console. Required unless `accessToken` is provided. */
147
+ apiKey?: string;
148
+ /** Optional JWT access token; takes a back seat to `apiKey` if both are provided. */
149
+ accessToken?: string;
150
+ /**
151
+ * Signaling WebSocket URL. Defaults to the production Convbased endpoint
152
+ * (`wss://api.weights.chat/api/signaling/ws`). Override only for
153
+ * self-hosted deployments. URLs ending in `/ws` are used as-is; bare
154
+ * hosts get `/signaling/ws` appended.
155
+ */
156
+ signalingUrl?: string;
157
+ /**
158
+ * GraphQL endpoint used to fetch TURN credentials. Defaults to the
159
+ * production Convbased endpoint (`https://api.weights.chat/api/v1/graphql`).
160
+ * Pass `false` to disable the auto-fetch entirely (will fall back to
161
+ * `iceServers` if provided, else public STUN).
162
+ */
163
+ graphqlUrl?: string | false;
164
+ /** Statically-configured ICE servers. If omitted and `graphqlUrl` is set, the SDK fetches `rtcServers`. */
165
+ iceServers?: RTCServersConfig[];
166
+ /** `relay` forces TURN-only — useful when STUN is blocked. */
167
+ iceTransportPolicy?: RTCIceTransportPolicy;
168
+ /** Opus bitrate in kbps. Default 64. */
169
+ bitrate?: number;
170
+ /** Send stereo. Default false. */
171
+ stereo?: boolean;
172
+ /** How long to wait for `SERVICE_READY` after sending the offer, in ms. Default 120_000. */
173
+ signalingTimeoutMs?: number;
174
+ /** How long to wait for the initial WebSocket open, in ms. Default 20_000. */
175
+ connectTimeoutMs?: number;
176
+ /** Optional logger; defaults to `console` for warn/error only. */
177
+ logger?: Partial<Pick<Console, "debug" | "info" | "warn" | "error">>;
178
+ }
179
+
180
+ export interface ConnectOptions {
181
+ /** Model ID to load on the inference node — required. */
182
+ modelId: string;
183
+ /** Microphone constraints. Pass an existing `MediaStream` to skip getUserMedia entirely. */
184
+ audio?: MediaStream | MediaTrackConstraints | boolean;
185
+ /** Additional RVC preferences forwarded to the node. */
186
+ preferences?: Partial<Omit<RTCPreferences, "model_id" | "sample_rate">>;
187
+ /** Sample rate to advertise to the node. Defaults to the AudioContext's `sampleRate`, falling back to 48000. */
188
+ sampleRate?: number;
189
+ }
190
+
191
+ export interface ConnectionStats {
192
+ rttMs: number;
193
+ jitter: number;
194
+ packetsLost: number;
195
+ }
196
+
197
+ export interface ServerMessageEvent {
198
+ code?: number;
199
+ message?: string;
200
+ raw: IncomingMessage;
201
+ }
package/src/upload.ts ADDED
@@ -0,0 +1,132 @@
1
+ // Audio upload helpers shared by TTS (reference voice) and file inference
2
+ // (source audio). Two steps mirror Convbased-Web: ask the GraphQL service for
3
+ // a presigned PUT (`requestAudioUpload`), then PUT the bytes straight to object
4
+ // storage. The returned COS `key` is what you hand to `submitTts` /
5
+ // `startTask`.
6
+
7
+ import { graphqlRequest, type GraphQLAuth } from "./graphql.js";
8
+
9
+ export interface HeaderKV {
10
+ name: string;
11
+ value: string;
12
+ }
13
+
14
+ export interface PresignedUpload {
15
+ /** COS object key — pass this back to the service (e.g. as `reference_key` / `audio_key`). */
16
+ key: string;
17
+ /** Presigned URL to PUT the bytes to. */
18
+ upload_url: string;
19
+ /** HTTP method to use for the upload (always `PUT` today). */
20
+ method: string;
21
+ /** Headers the upload request must include (notably `Content-Type`). */
22
+ headers: HeaderKV[];
23
+ /** Seconds until the presigned URL expires. */
24
+ expires_in: number;
25
+ bucket?: string | null;
26
+ region?: string | null;
27
+ /** CDN base URL for the bucket, when configured. */
28
+ url?: string | null;
29
+ }
30
+
31
+ const REQUEST_AUDIO_UPLOAD = /* GraphQL */ `
32
+ mutation RequestAudioUpload($input: RequestUploadInput!) {
33
+ requestAudioUpload(input: $input) {
34
+ key
35
+ upload_url
36
+ method
37
+ expires_in
38
+ headers {
39
+ name
40
+ value
41
+ }
42
+ bucket
43
+ region
44
+ url
45
+ }
46
+ }
47
+ `;
48
+
49
+ /** Ask the service for a presigned PUT for an audio file. */
50
+ export async function requestAudioUpload(
51
+ args: GraphQLAuth & {
52
+ graphqlUrl: string;
53
+ filename: string;
54
+ contentType: string;
55
+ size: number;
56
+ signal?: AbortSignal;
57
+ }
58
+ ): Promise<PresignedUpload> {
59
+ const data = await graphqlRequest<{ requestAudioUpload: PresignedUpload }>({
60
+ graphqlUrl: args.graphqlUrl,
61
+ apiKey: args.apiKey,
62
+ accessToken: args.accessToken,
63
+ signal: args.signal,
64
+ query: REQUEST_AUDIO_UPLOAD,
65
+ variables: {
66
+ input: {
67
+ filename: args.filename,
68
+ content_type: args.contentType,
69
+ size: args.size,
70
+ },
71
+ },
72
+ });
73
+ return data.requestAudioUpload;
74
+ }
75
+
76
+ /** PUT raw bytes to a presigned upload target. */
77
+ export async function putToPresigned(
78
+ presigned: PresignedUpload,
79
+ body: Blob | ArrayBuffer | ArrayBufferView,
80
+ signal?: AbortSignal
81
+ ): Promise<void> {
82
+ const headers: Record<string, string> = {};
83
+ for (const h of presigned.headers ?? []) {
84
+ if (h?.name) headers[h.name] = h.value;
85
+ }
86
+ const res = await fetch(presigned.upload_url, {
87
+ method: presigned.method || "PUT",
88
+ headers,
89
+ body: body as BodyInit,
90
+ signal,
91
+ });
92
+ if (!res.ok) {
93
+ throw new Error(
94
+ `Audio upload failed: HTTP ${res.status} ${res.statusText}`
95
+ );
96
+ }
97
+ }
98
+
99
+ /**
100
+ * Upload an audio `Blob`/`File` end-to-end (presign + PUT) and resolve the COS
101
+ * `key`. Filename and content type are taken from the `File` when available;
102
+ * override via `opts` when uploading a bare `Blob`.
103
+ */
104
+ export async function uploadAudio(
105
+ args: GraphQLAuth & {
106
+ graphqlUrl: string;
107
+ file: Blob;
108
+ filename?: string;
109
+ contentType?: string;
110
+ signal?: AbortSignal;
111
+ }
112
+ ): Promise<{ key: string }> {
113
+ const maybeFile = args.file as File;
114
+ const filename = args.filename ?? maybeFile.name ?? "audio.wav";
115
+ const contentType =
116
+ args.contentType ||
117
+ args.file.type ||
118
+ "application/octet-stream";
119
+
120
+ const presigned = await requestAudioUpload({
121
+ graphqlUrl: args.graphqlUrl,
122
+ apiKey: args.apiKey,
123
+ accessToken: args.accessToken,
124
+ signal: args.signal,
125
+ filename,
126
+ contentType,
127
+ size: args.file.size,
128
+ });
129
+
130
+ await putToPresigned(presigned, args.file, args.signal);
131
+ return { key: presigned.key };
132
+ }