npm - @upliftai/sdk-js - Versions diffs - 0.1.0 - Mend

@upliftai/sdk-js 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

package/dist/index.d.ts ADDED Viewed

@@ -0,0 +1,304 @@
+import * as node_stream from 'node:stream';
+interface HttpClientOptions {
+    baseUrl: string;
+    apiKey: string;
+    timeout?: number;
+    maxRetries?: number;
+}
+declare class HttpClient {
+    private baseUrl;
+    private apiKey;
+    private timeout;
+    private maxRetries;
+    constructor(options: HttpClientOptions);
+    private headers;
+    private fetchWithRetry;
+    private retryDelay;
+    postJSON<T>(path: string, body: Record<string, unknown>): Promise<{
+        data: T;
+        headers: Headers;
+    }>;
+    postJSONForBuffer(path: string, body: Record<string, unknown>): Promise<{
+        buffer: Buffer;
+        headers: Headers;
+    }>;
+    postJSONForStream(path: string, body: Record<string, unknown>): Promise<{
+        body: ReadableStream<Uint8Array>;
+        headers: Headers;
+    }>;
+    postMultipart<T>(path: string, formData: FormData): Promise<{
+        data: T;
+        headers: Headers;
+    }>;
+    get<T>(path: string): Promise<{
+        data: T;
+        headers: Headers;
+    }>;
+    getStream(path: string, query?: Record<string, string>): Promise<{
+        body: ReadableStream<Uint8Array>;
+        headers: Headers;
+    }>;
+    private throwForStatus;
+    private safeText;
+}
+type OutputFormat = 'PCM_22050_16' | 'WAV_22050_16' | 'WAV_22050_32' | 'MP3_22050_32' | 'MP3_22050_64' | 'MP3_22050_128' | 'OGG_22050_16' | 'ULAW_8000_8';
+interface TTSRequest {
+    text: string;
+    voiceId: string;
+    outputFormat?: OutputFormat;
+    phraseReplacementConfigId?: string;
+}
+interface AudioMetadata {
+    requestId: string;
+    duration: number;
+    contentType: string;
+    sampleRate: number;
+    bitRate: number;
+}
+interface AudioResponse {
+    audio: Buffer;
+    metadata: AudioMetadata;
+}
+interface StreamResponse {
+    stream: node_stream.Readable;
+    metadata: AudioMetadata;
+}
+/**
+ * Result of enqueuing a TTS job. Use `mediaId` with `retrieve()` to fetch
+ * the audio, or pass `temporaryUrl` directly to a frontend/client (e.g.
+ * WhatsApp, browser audio element) without downloading first.
+ */
+interface EnqueueResponse {
+    mediaId: string;
+    token: string;
+    /** Pre-signed URL to stream audio directly — no auth required. Short-lived, do not persist. */
+    temporaryUrl: string;
+}
+interface WSAudioStart {
+    type: 'audio_start';
+    requestId: string;
+    timestamp: number;
+}
+interface WSAudio {
+    type: 'audio';
+    requestId: string;
+    sequence: number;
+    audio: Buffer;
+}
+interface WSAudioEnd {
+    type: 'audio_end';
+    requestId: string;
+    timestamp: number;
+}
+interface WSError {
+    type: 'error';
+    requestId: string;
+    code: string;
+    message: string;
+}
+type TTSStreamEvent = WSAudioStart | WSAudio | WSAudioEnd | WSError;
+interface TranscriptionRequestBase {
+    model?: 'scribe' | 'scribe-mini';
+    language?: 'ur';
+    domain?: 'phone-commerce' | 'farming';
+}
+interface TranscriptionRequestFromPath extends TranscriptionRequestBase {
+    /** Path to an audio file. Extension is used for content-type detection. */
+    file: string;
+    fileName?: never;
+}
+interface TranscriptionRequestFromBuffer extends TranscriptionRequestBase {
+    /** Audio data as a Buffer or readable stream. */
+    file: Buffer | NodeJS.ReadableStream;
+    /**
+     * Filename hint for content-type detection on the server (e.g. `'call.mp3'`).
+     * The extension tells the server what format the audio is in.
+     */
+    fileName: string;
+}
+type TranscriptionRequest = TranscriptionRequestFromPath | TranscriptionRequestFromBuffer;
+interface TranscriptionResponse {
+    transcript: string;
+}
+interface PhraseReplacement {
+    phrase: string;
+    replacement: string;
+}
+interface PhraseReplacementConfig {
+    configId: string;
+    phraseReplacements: PhraseReplacement[];
+}
+interface UpliftAIOptions {
+    apiKey?: string;
+    baseUrl?: string;
+    timeout?: number;
+    maxRetries?: number;
+}
+interface TTSStream extends AsyncIterable<TTSStreamEvent> {
+    cancel(): Promise<void>;
+    requestId: string;
+}
+type WSReadyState = 'connecting' | 'open' | 'closing' | 'closed';
+interface TTSWebSocket {
+    stream(request: TTSRequest & {
+        requestId?: string;
+    }): TTSStream;
+    cancelAll(): void;
+    readonly activeStreams: number;
+    close(): void;
+    readonly readyState: WSReadyState;
+    readonly sessionId: string;
+    on(event: 'error', listener: (error: Error) => void): this;
+    on(event: 'close', listener: (code: number, reason: string) => void): this;
+}
+declare class PhraseReplacements {
+    private http;
+    constructor(http: HttpClient);
+    create(replacements: PhraseReplacement[]): Promise<PhraseReplacementConfig>;
+    get(configId: string): Promise<PhraseReplacementConfig>;
+    list(): Promise<PhraseReplacementConfig[]>;
+    update(configId: string, replacements: PhraseReplacement[]): Promise<PhraseReplacementConfig>;
+}
+/** Text-to-speech resource. Access via `client.tts`. */
+declare class TTS {
+    private http;
+    private apiKey;
+    private baseUrl;
+    private wsBaseUrl;
+    /** Manage phrase replacement configs for pronunciation control. */
+    readonly phraseReplacements: PhraseReplacements;
+    constructor(http: HttpClient, apiKey: string, baseUrl: string, wsBaseUrl: string);
+    /**
+     * Synthesize text and return the full audio buffer.
+     *
+     * Generates the complete audio before returning. Faster end-to-end than
+     * streaming, but the caller must wait for the entire file. Best for
+     * batch/offline use cases where latency to first byte doesn't matter.
+     *
+     * @example
+     * const { audio, metadata } = await client.tts.create({ text: 'سلام', voiceId: 'v_meklc281' });
+     * fs.writeFileSync('output.mp3', audio);
+     */
+    create(request: TTSRequest): Promise<AudioResponse>;
+    /**
+     * Synthesize text and return a readable stream of audio chunks.
+     *
+     * The first chunk arrives quickly, but total generation is slower than
+     * `create()`. Use this in latency-sensitive environments like live agents,
+     * phone calls, or real-time playback where you want audio to start playing
+     * immediately rather than waiting for the full file.
+     *
+     * @example
+     * const { stream, metadata } = await client.tts.createStream({ text: 'سلام', voiceId: 'v_meklc281' });
+     * for await (const chunk of stream) speaker.write(chunk);
+     */
+    createStream(request: TTSRequest): Promise<StreamResponse>;
+    /**
+     * Enqueue an async TTS job. Returns a `mediaId` to retrieve the audio later.
+     *
+     * Use for batch processing or when you don't need audio immediately.
+     * Poll or call `retrieve(mediaId)` when the audio is ready.
+     *
+     * @example
+     * const { mediaId, temporaryUrl } = await client.tts.enqueue({ text: 'سلام', voiceId: 'v_meklc281' });
+     * // retrieve server-side
+     * const audio = await client.tts.retrieve(mediaId);
+     * // or pass URL directly to a client/browser
+     * console.log(temporaryUrl);
+     */
+    enqueue(request: TTSRequest): Promise<EnqueueResponse>;
+    /**
+     * Enqueue an async TTS job with streaming retrieval.
+     *
+     * Same as `enqueue()`, but when retrieved via `retrieve(mediaId)` the audio
+     * streams in chunks instead of arriving as a single buffer.
+     *
+     * @example
+     * const { mediaId, temporaryUrl } = await client.tts.enqueueStream({ text: 'سلام', voiceId: 'v_meklc281' });
+     * const stream = await client.tts.retrieve(mediaId);
+     * for await (const chunk of stream) speaker.write(chunk);
+     */
+    enqueueStream(request: TTSRequest): Promise<EnqueueResponse>;
+    /**
+     * Retrieve audio from a previously enqueued job.
+     *
+     * Returns the audio stream along with metadata (encoding, sample rate, etc.)
+     * from response headers.
+     *
+     * @example
+     * const { stream, metadata } = await client.tts.retrieve('<mediaId from enqueue>');
+     * console.log(metadata.contentType); // 'audio/mpeg'
+     * for await (const chunk of stream) fs.appendFileSync('out.mp3', chunk);
+     */
+    retrieve(mediaId: string): Promise<StreamResponse>;
+    /**
+     * Open a persistent WebSocket connection for low-latency streaming TTS.
+     *
+     * Supports multiple concurrent streams on one connection, multiplexed by
+     * requestId. Use for real-time conversational AI, live agents, and
+     * interactive use cases. Resolves once the connection is ready.
+     *
+     * Open one connection per conversation or user session — don't share across
+     * unrelated contexts.
+     *
+     * @example
+     * const ws = await client.tts.connect();
+     * // Stream sentence-by-sentence as your LLM generates
+     * for await (const sentence of llm.streamSentences(prompt)) {
+     *   const stream = ws.stream({ text: sentence, voiceId: 'v_meklc281' });
+     *   for await (const event of stream) {
+     *     if (event.type === 'audio') speaker.write(event.audio);
+     *   }
+     * }
+     * ws.close();
+     */
+    private buildTemporaryUrl;
+    connect(): Promise<TTSWebSocket>;
+}
+/** Speech-to-text resource. Access via `client.stt`. */
+declare class STT {
+    private http;
+    constructor(http: HttpClient);
+    /**
+     * Transcribe audio to text.
+     *
+     * Accepts a file path, Buffer, or readable stream as input.
+     *
+     * @example
+     * // From file path (extension used for content-type detection)
+     * const { transcript } = await client.stt.transcribe({ file: './call.mp3', model: 'scribe' });
+     *
+     * // From Buffer (pass fileName so the server knows the format)
+     * const { transcript } = await client.stt.transcribe({ file: audioBuffer, fileName: 'call.mp3', language: 'ur' });
+     */
+    transcribe(request: TranscriptionRequest): Promise<TranscriptionResponse>;
+}
+declare class UpliftAI {
+    readonly tts: TTS;
+    readonly stt: STT;
+    constructor(options?: UpliftAIOptions);
+}
+declare class UpliftAIError extends Error {
+    readonly statusCode?: number | undefined;
+    readonly code?: string | undefined;
+    readonly requestId?: string | undefined;
+    constructor(message: string, statusCode?: number | undefined, code?: string | undefined, requestId?: string | undefined);
+}
+declare class UpliftAIAuthError extends UpliftAIError {
+    constructor(message?: string, requestId?: string);
+}
+declare class UpliftAIInsufficientBalanceError extends UpliftAIError {
+    constructor(message?: string, requestId?: string);
+}
+declare class UpliftAIRateLimitError extends UpliftAIError {
+    constructor(message?: string, requestId?: string);
+}
+export { type AudioMetadata, type AudioResponse, type EnqueueResponse, type OutputFormat, type PhraseReplacement, type PhraseReplacementConfig, type StreamResponse, type TTSRequest, type TTSStream, type TTSStreamEvent, type TTSWebSocket, type TranscriptionRequest, type TranscriptionRequestFromBuffer, type TranscriptionRequestFromPath, type TranscriptionResponse, UpliftAI, UpliftAIAuthError, UpliftAIError, UpliftAIInsufficientBalanceError, type UpliftAIOptions, UpliftAIRateLimitError, type WSAudio, type WSAudioEnd, type WSAudioStart, type WSError, type WSReadyState, UpliftAI as default };