@upliftai/sdk-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,304 @@
1
+ import * as node_stream from 'node:stream';
2
+
3
+ interface HttpClientOptions {
4
+ baseUrl: string;
5
+ apiKey: string;
6
+ timeout?: number;
7
+ maxRetries?: number;
8
+ }
9
+ declare class HttpClient {
10
+ private baseUrl;
11
+ private apiKey;
12
+ private timeout;
13
+ private maxRetries;
14
+ constructor(options: HttpClientOptions);
15
+ private headers;
16
+ private fetchWithRetry;
17
+ private retryDelay;
18
+ postJSON<T>(path: string, body: Record<string, unknown>): Promise<{
19
+ data: T;
20
+ headers: Headers;
21
+ }>;
22
+ postJSONForBuffer(path: string, body: Record<string, unknown>): Promise<{
23
+ buffer: Buffer;
24
+ headers: Headers;
25
+ }>;
26
+ postJSONForStream(path: string, body: Record<string, unknown>): Promise<{
27
+ body: ReadableStream<Uint8Array>;
28
+ headers: Headers;
29
+ }>;
30
+ postMultipart<T>(path: string, formData: FormData): Promise<{
31
+ data: T;
32
+ headers: Headers;
33
+ }>;
34
+ get<T>(path: string): Promise<{
35
+ data: T;
36
+ headers: Headers;
37
+ }>;
38
+ getStream(path: string, query?: Record<string, string>): Promise<{
39
+ body: ReadableStream<Uint8Array>;
40
+ headers: Headers;
41
+ }>;
42
+ private throwForStatus;
43
+ private safeText;
44
+ }
45
+
46
+ type OutputFormat = 'PCM_22050_16' | 'WAV_22050_16' | 'WAV_22050_32' | 'MP3_22050_32' | 'MP3_22050_64' | 'MP3_22050_128' | 'OGG_22050_16' | 'ULAW_8000_8';
47
+ interface TTSRequest {
48
+ text: string;
49
+ voiceId: string;
50
+ outputFormat?: OutputFormat;
51
+ phraseReplacementConfigId?: string;
52
+ }
53
+ interface AudioMetadata {
54
+ requestId: string;
55
+ duration: number;
56
+ contentType: string;
57
+ sampleRate: number;
58
+ bitRate: number;
59
+ }
60
+ interface AudioResponse {
61
+ audio: Buffer;
62
+ metadata: AudioMetadata;
63
+ }
64
+ interface StreamResponse {
65
+ stream: node_stream.Readable;
66
+ metadata: AudioMetadata;
67
+ }
68
+ /**
69
+ * Result of enqueuing a TTS job. Use `mediaId` with `retrieve()` to fetch
70
+ * the audio, or pass `temporaryUrl` directly to a frontend/client (e.g.
71
+ * WhatsApp, browser audio element) without downloading first.
72
+ */
73
+ interface EnqueueResponse {
74
+ mediaId: string;
75
+ token: string;
76
+ /** Pre-signed URL to stream audio directly — no auth required. Short-lived, do not persist. */
77
+ temporaryUrl: string;
78
+ }
79
+ interface WSAudioStart {
80
+ type: 'audio_start';
81
+ requestId: string;
82
+ timestamp: number;
83
+ }
84
+ interface WSAudio {
85
+ type: 'audio';
86
+ requestId: string;
87
+ sequence: number;
88
+ audio: Buffer;
89
+ }
90
+ interface WSAudioEnd {
91
+ type: 'audio_end';
92
+ requestId: string;
93
+ timestamp: number;
94
+ }
95
+ interface WSError {
96
+ type: 'error';
97
+ requestId: string;
98
+ code: string;
99
+ message: string;
100
+ }
101
+ type TTSStreamEvent = WSAudioStart | WSAudio | WSAudioEnd | WSError;
102
+ interface TranscriptionRequestBase {
103
+ model?: 'scribe' | 'scribe-mini';
104
+ language?: 'ur';
105
+ domain?: 'phone-commerce' | 'farming';
106
+ }
107
+ interface TranscriptionRequestFromPath extends TranscriptionRequestBase {
108
+ /** Path to an audio file. Extension is used for content-type detection. */
109
+ file: string;
110
+ fileName?: never;
111
+ }
112
+ interface TranscriptionRequestFromBuffer extends TranscriptionRequestBase {
113
+ /** Audio data as a Buffer or readable stream. */
114
+ file: Buffer | NodeJS.ReadableStream;
115
+ /**
116
+ * Filename hint for content-type detection on the server (e.g. `'call.mp3'`).
117
+ * The extension tells the server what format the audio is in.
118
+ */
119
+ fileName: string;
120
+ }
121
+ type TranscriptionRequest = TranscriptionRequestFromPath | TranscriptionRequestFromBuffer;
122
+ interface TranscriptionResponse {
123
+ transcript: string;
124
+ }
125
+ interface PhraseReplacement {
126
+ phrase: string;
127
+ replacement: string;
128
+ }
129
+ interface PhraseReplacementConfig {
130
+ configId: string;
131
+ phraseReplacements: PhraseReplacement[];
132
+ }
133
+ interface UpliftAIOptions {
134
+ apiKey?: string;
135
+ baseUrl?: string;
136
+ timeout?: number;
137
+ maxRetries?: number;
138
+ }
139
+ interface TTSStream extends AsyncIterable<TTSStreamEvent> {
140
+ cancel(): Promise<void>;
141
+ requestId: string;
142
+ }
143
+ type WSReadyState = 'connecting' | 'open' | 'closing' | 'closed';
144
+ interface TTSWebSocket {
145
+ stream(request: TTSRequest & {
146
+ requestId?: string;
147
+ }): TTSStream;
148
+ cancelAll(): void;
149
+ readonly activeStreams: number;
150
+ close(): void;
151
+ readonly readyState: WSReadyState;
152
+ readonly sessionId: string;
153
+ on(event: 'error', listener: (error: Error) => void): this;
154
+ on(event: 'close', listener: (code: number, reason: string) => void): this;
155
+ }
156
+
157
+ declare class PhraseReplacements {
158
+ private http;
159
+ constructor(http: HttpClient);
160
+ create(replacements: PhraseReplacement[]): Promise<PhraseReplacementConfig>;
161
+ get(configId: string): Promise<PhraseReplacementConfig>;
162
+ list(): Promise<PhraseReplacementConfig[]>;
163
+ update(configId: string, replacements: PhraseReplacement[]): Promise<PhraseReplacementConfig>;
164
+ }
165
+
166
+ /** Text-to-speech resource. Access via `client.tts`. */
167
+ declare class TTS {
168
+ private http;
169
+ private apiKey;
170
+ private baseUrl;
171
+ private wsBaseUrl;
172
+ /** Manage phrase replacement configs for pronunciation control. */
173
+ readonly phraseReplacements: PhraseReplacements;
174
+ constructor(http: HttpClient, apiKey: string, baseUrl: string, wsBaseUrl: string);
175
+ /**
176
+ * Synthesize text and return the full audio buffer.
177
+ *
178
+ * Generates the complete audio before returning. Faster end-to-end than
179
+ * streaming, but the caller must wait for the entire file. Best for
180
+ * batch/offline use cases where latency to first byte doesn't matter.
181
+ *
182
+ * @example
183
+ * const { audio, metadata } = await client.tts.create({ text: 'سلام', voiceId: 'v_meklc281' });
184
+ * fs.writeFileSync('output.mp3', audio);
185
+ */
186
+ create(request: TTSRequest): Promise<AudioResponse>;
187
+ /**
188
+ * Synthesize text and return a readable stream of audio chunks.
189
+ *
190
+ * The first chunk arrives quickly, but total generation is slower than
191
+ * `create()`. Use this in latency-sensitive environments like live agents,
192
+ * phone calls, or real-time playback where you want audio to start playing
193
+ * immediately rather than waiting for the full file.
194
+ *
195
+ * @example
196
+ * const { stream, metadata } = await client.tts.createStream({ text: 'سلام', voiceId: 'v_meklc281' });
197
+ * for await (const chunk of stream) speaker.write(chunk);
198
+ */
199
+ createStream(request: TTSRequest): Promise<StreamResponse>;
200
+ /**
201
+ * Enqueue an async TTS job. Returns a `mediaId` to retrieve the audio later.
202
+ *
203
+ * Use for batch processing or when you don't need audio immediately.
204
+ * Poll or call `retrieve(mediaId)` when the audio is ready.
205
+ *
206
+ * @example
207
+ * const { mediaId, temporaryUrl } = await client.tts.enqueue({ text: 'سلام', voiceId: 'v_meklc281' });
208
+ * // retrieve server-side
209
+ * const audio = await client.tts.retrieve(mediaId);
210
+ * // or pass URL directly to a client/browser
211
+ * console.log(temporaryUrl);
212
+ */
213
+ enqueue(request: TTSRequest): Promise<EnqueueResponse>;
214
+ /**
215
+ * Enqueue an async TTS job with streaming retrieval.
216
+ *
217
+ * Same as `enqueue()`, but when retrieved via `retrieve(mediaId)` the audio
218
+ * streams in chunks instead of arriving as a single buffer.
219
+ *
220
+ * @example
221
+ * const { mediaId, temporaryUrl } = await client.tts.enqueueStream({ text: 'سلام', voiceId: 'v_meklc281' });
222
+ * const stream = await client.tts.retrieve(mediaId);
223
+ * for await (const chunk of stream) speaker.write(chunk);
224
+ */
225
+ enqueueStream(request: TTSRequest): Promise<EnqueueResponse>;
226
+ /**
227
+ * Retrieve audio from a previously enqueued job.
228
+ *
229
+ * Returns the audio stream along with metadata (encoding, sample rate, etc.)
230
+ * from response headers.
231
+ *
232
+ * @example
233
+ * const { stream, metadata } = await client.tts.retrieve('<mediaId from enqueue>');
234
+ * console.log(metadata.contentType); // 'audio/mpeg'
235
+ * for await (const chunk of stream) fs.appendFileSync('out.mp3', chunk);
236
+ */
237
+ retrieve(mediaId: string): Promise<StreamResponse>;
238
+ /**
239
+ * Open a persistent WebSocket connection for low-latency streaming TTS.
240
+ *
241
+ * Supports multiple concurrent streams on one connection, multiplexed by
242
+ * requestId. Use for real-time conversational AI, live agents, and
243
+ * interactive use cases. Resolves once the connection is ready.
244
+ *
245
+ * Open one connection per conversation or user session — don't share across
246
+ * unrelated contexts.
247
+ *
248
+ * @example
249
+ * const ws = await client.tts.connect();
250
+ * // Stream sentence-by-sentence as your LLM generates
251
+ * for await (const sentence of llm.streamSentences(prompt)) {
252
+ * const stream = ws.stream({ text: sentence, voiceId: 'v_meklc281' });
253
+ * for await (const event of stream) {
254
+ * if (event.type === 'audio') speaker.write(event.audio);
255
+ * }
256
+ * }
257
+ * ws.close();
258
+ */
259
+ private buildTemporaryUrl;
260
+ connect(): Promise<TTSWebSocket>;
261
+ }
262
+
263
+ /** Speech-to-text resource. Access via `client.stt`. */
264
+ declare class STT {
265
+ private http;
266
+ constructor(http: HttpClient);
267
+ /**
268
+ * Transcribe audio to text.
269
+ *
270
+ * Accepts a file path, Buffer, or readable stream as input.
271
+ *
272
+ * @example
273
+ * // From file path (extension used for content-type detection)
274
+ * const { transcript } = await client.stt.transcribe({ file: './call.mp3', model: 'scribe' });
275
+ *
276
+ * // From Buffer (pass fileName so the server knows the format)
277
+ * const { transcript } = await client.stt.transcribe({ file: audioBuffer, fileName: 'call.mp3', language: 'ur' });
278
+ */
279
+ transcribe(request: TranscriptionRequest): Promise<TranscriptionResponse>;
280
+ }
281
+
282
+ declare class UpliftAI {
283
+ readonly tts: TTS;
284
+ readonly stt: STT;
285
+ constructor(options?: UpliftAIOptions);
286
+ }
287
+
288
+ declare class UpliftAIError extends Error {
289
+ readonly statusCode?: number | undefined;
290
+ readonly code?: string | undefined;
291
+ readonly requestId?: string | undefined;
292
+ constructor(message: string, statusCode?: number | undefined, code?: string | undefined, requestId?: string | undefined);
293
+ }
294
+ declare class UpliftAIAuthError extends UpliftAIError {
295
+ constructor(message?: string, requestId?: string);
296
+ }
297
+ declare class UpliftAIInsufficientBalanceError extends UpliftAIError {
298
+ constructor(message?: string, requestId?: string);
299
+ }
300
+ declare class UpliftAIRateLimitError extends UpliftAIError {
301
+ constructor(message?: string, requestId?: string);
302
+ }
303
+
304
+ export { type AudioMetadata, type AudioResponse, type EnqueueResponse, type OutputFormat, type PhraseReplacement, type PhraseReplacementConfig, type StreamResponse, type TTSRequest, type TTSStream, type TTSStreamEvent, type TTSWebSocket, type TranscriptionRequest, type TranscriptionRequestFromBuffer, type TranscriptionRequestFromPath, type TranscriptionResponse, UpliftAI, UpliftAIAuthError, UpliftAIError, UpliftAIInsufficientBalanceError, type UpliftAIOptions, UpliftAIRateLimitError, type WSAudio, type WSAudioEnd, type WSAudioStart, type WSError, type WSReadyState, UpliftAI as default };