npm - @cartesia/cartesia-js - Versions diffs - 3.0.0-b5 → 3.0.0-b6 - Mend

@cartesia/cartesia-js 3.0.0-b5 → 3.0.0-b6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (52) hide show

package/CHANGELOG.md +4 -0
package/backcompat/index.d.mts +11 -31
package/backcompat/index.d.mts.map +1 -1
package/backcompat/index.d.ts +11 -31
package/backcompat/index.d.ts.map +1 -1
package/backcompat/index.js +22 -252
package/backcompat/index.js.map +1 -1
package/backcompat/index.mjs +21 -251
package/backcompat/index.mjs.map +1 -1
package/backcompat/tts-wrapper.d.mts +66 -0
package/backcompat/tts-wrapper.d.mts.map +1 -0
package/backcompat/tts-wrapper.d.ts +66 -0
package/backcompat/tts-wrapper.d.ts.map +1 -0
package/backcompat/tts-wrapper.js +260 -0
package/backcompat/tts-wrapper.js.map +1 -0
package/backcompat/tts-wrapper.mjs +254 -0
package/backcompat/tts-wrapper.mjs.map +1 -0
package/backcompat/types.d.mts +18 -0
package/backcompat/types.d.mts.map +1 -0
package/backcompat/types.d.ts +18 -0
package/backcompat/types.d.ts.map +1 -0
package/backcompat/types.js +3 -0
package/backcompat/types.js.map +1 -0
package/backcompat/types.mjs +2 -0
package/backcompat/types.mjs.map +1 -0
package/backcompat/voice-changer-wrapper.d.mts +17 -0
package/backcompat/voice-changer-wrapper.d.mts.map +1 -0
package/backcompat/voice-changer-wrapper.d.ts +17 -0
package/backcompat/voice-changer-wrapper.d.ts.map +1 -0
package/backcompat/voice-changer-wrapper.js +46 -0
package/backcompat/voice-changer-wrapper.js.map +1 -0
package/backcompat/voice-changer-wrapper.mjs +42 -0
package/backcompat/voice-changer-wrapper.mjs.map +1 -0
package/backcompat/voices-wrapper.d.mts +32 -0
package/backcompat/voices-wrapper.d.mts.map +1 -0
package/backcompat/voices-wrapper.d.ts +32 -0
package/backcompat/voices-wrapper.d.ts.map +1 -0
package/backcompat/voices-wrapper.js +77 -0
package/backcompat/voices-wrapper.js.map +1 -0
package/backcompat/voices-wrapper.mjs +73 -0
package/backcompat/voices-wrapper.mjs.map +1 -0
package/package.json +1 -1
package/src/backcompat/index.ts +27 -289
package/src/backcompat/tts-wrapper.ts +324 -0
package/src/backcompat/types.ts +19 -0
package/src/backcompat/voice-changer-wrapper.ts +67 -0
package/src/backcompat/voices-wrapper.ts +158 -0
package/src/version.ts +1 -1
package/version.d.mts +1 -1
package/version.d.ts +1 -1
package/version.js +1 -1
package/version.mjs +1 -1

package/src/backcompat/tts-wrapper.ts ADDED Viewed

@@ -0,0 +1,324 @@
+import WebSocket from "ws";
+import { Cartesia } from "../client";
+import { type RequestOptions as InternalRequestOptions } from "../internal/request-options";
+import { BackCompatRequestOptions } from "./types";
+// Define compatible interfaces to match the old SDK types for WebSocket
+export interface BackCompatWebSocketOptions {
+	container?: "raw" | "wav" | "mp3";
+	encoding?: "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
+	sampleRate: number;
+}
+export type BackCompatTtsRequestVoiceSpecifier =
+	| { mode: "id"; id: string }
+	| { mode: "embedding"; embedding: number[] };
+export interface BackCompatGenerationConfig {
+	volume?: number;
+	speed?: number;
+	emotion?: string[]; // Simplified from strict union for backcompat flexibility
+}
+export interface BackCompatWebSocketTtsRequest {
+	modelId: string;
+	transcript: string;
+	voice: BackCompatTtsRequestVoiceSpecifier;
+	generationConfig?: BackCompatGenerationConfig;
+	outputFormat?: {
+		container?: "raw" | "wav" | "mp3";
+		encoding?: "pcm_f32le" | "pcm_s16le" | "pcm_alaw" | "pcm_mulaw";
+		sampleRate?: number;
+		bitRate?: number;
+	};
+	contextId?: string; // Backcompat might pass this in request?
+	// Add other fields as needed
+	continue?: boolean;
+	duration?: number;
+	addTimestamps?: boolean;
+	addPhonemeTimestamps?: boolean;
+}
+// Helper for generating UUIDs. Not cryptographically secure.
+function uuidv4() {
+	return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function(c) {
+		var r = Math.random() * 16 | 0, v = c == 'x' ? r : (r & 0x3 | 0x8);
+		return v.toString(16);
+	});
+}
+class AudioSource {
+	private buffers: Buffer[] = [];
+	private waiter: ((val?: any) => void) | null = null;
+	public isDone = false;
+	push(data: Buffer) {
+		this.buffers.push(data);
+		if (this.waiter) {
+			this.waiter();
+			this.waiter = null;
+		}
+	}
+	markDone() {
+		this.isDone = true;
+		if (this.waiter) {
+			this.waiter();
+			this.waiter = null;
+		}
+	}
+	async read(outBuffer: Float32Array): Promise<number> {
+		if (this.buffers.length === 0 && !this.isDone) {
+			await new Promise<void>((resolve) => { this.waiter = resolve; });
+		}
+		if (this.buffers.length === 0 && this.isDone) {
+			return 0;
+		}
+		let totalFloatsRead = 0;
+		let outOffset = 0;
+		const maxFloats = outBuffer.length;
+		while (this.buffers.length > 0 && totalFloatsRead < maxFloats) {
+			const buf = this.buffers[0] as Buffer; // ts not smart enough to check loop condition
+			const floatsInBuf = buf.length / 4;
+			const floatsNeeded = maxFloats - totalFloatsRead;
+			const floatsToCopy = Math.min(floatsInBuf, floatsNeeded);
+			const bytesToCopy = floatsToCopy * 4;
+			// Copy to outBuffer.
+			// Create a view on the buffer to read floats.
+			// We need to ensure byteOffset is a multiple of 4.
+			// If not, we must copy the buffer to a new one.
+			let srcFloats: Float32Array;
+			if (buf.byteOffset % 4 === 0) {
+				srcFloats = new Float32Array(buf.buffer, buf.byteOffset, floatsInBuf);
+			} else {
+				const alignedBuf = new Uint8Array(buf);
+				srcFloats = new Float32Array(alignedBuf.buffer, alignedBuf.byteOffset, floatsInBuf);
+			}
+			outBuffer.set(srcFloats.subarray(0, floatsToCopy), outOffset);
+			totalFloatsRead += floatsToCopy;
+			outOffset += floatsToCopy;
+			if (floatsToCopy < floatsInBuf) {
+				// We didn't use the whole buffer. Update it.
+				this.buffers[0] = buf.subarray(bytesToCopy);
+			} else {
+				// We used the whole buffer. Remove it.
+				this.buffers.shift();
+			}
+		}
+		return totalFloatsRead;
+	}
+}
+export class WebSocketWrapper {
+	private client: Cartesia;
+	private config: BackCompatWebSocketOptions;
+	private socket: WebSocket | null = null;
+	private sources: Map<string, AudioSource> = new Map();
+	// Fallback source for messages without context_id or if we just want to capture everything (legacy behavior?)
+	// The original test didn't use context_id explicitly in send() but expected a response source.
+	// We'll map context_id to source.
+	private defaultSource: AudioSource | null = null;
+	constructor(client: Cartesia, config: BackCompatWebSocketOptions) {
+		this.client = client;
+		this.config = config;
+	}
+	async connect() {
+		const baseURL = this.client.baseURL;
+		// Construct WebSocket URL
+		// baseURL is like https://api.cartesia.ai
+		let urlStr = baseURL.replace(/^http/, "ws");
+		if (!urlStr.includes("/tts/websocket")) {
+			if (urlStr.endsWith("/")) {
+				urlStr += "tts/websocket";
+			} else {
+				urlStr += "/tts/websocket";
+			}
+		}
+		const url = new URL(urlStr);
+		const headers: any = {
+			"cartesia-version": "2025-04-16",
+		};
+		if (this.client.apiKey) {
+			headers["Authorization"] = `Bearer ${this.client.apiKey}`;
+		}
+		this.socket = new WebSocket(url.toString(), {
+			headers: headers,
+		});
+		return new Promise<void>((resolve, reject) => {
+			this.socket!.on("open", () => {
+				console.log("WebSocket connected.");
+				resolve();
+			});
+			this.socket!.on("error", (err) => {
+				console.error("WebSocket error:", err);
+				reject(err);
+			});
+			this.socket!.on("message", (data) => {
+				this.handleMessage(data);
+			});
+			this.socket!.on("close", () => {
+				console.log("WebSocket closed.");
+				this.sources.forEach((s) => { s.markDone(); });
+				if (this.defaultSource) this.defaultSource.markDone();
+			});
+		});
+	}
+	private handleMessage(data: WebSocket.Data) {
+		try {
+			const str = data.toString();
+			const msg = JSON.parse(str);
+			const contextId = msg.context_id;
+			let source = contextId ? this.sources.get(contextId) : this.defaultSource;
+			// If we received a message for a context we don't know about, and we have a default source, use it
+			if (!source && this.defaultSource) {
+				source = this.defaultSource;
+			}
+			if (msg.type === "chunk" && msg.data) {
+				const audioData = Buffer.from(msg.data, "base64");
+				if (source) source.push(audioData);
+			} else if (msg.type === "done") {
+				if (source) source.markDone();
+			} else if (msg.type === "error") {
+				console.error("Server error:", msg);
+				if (source) source.markDone(); // Fail the stream?
+			}
+		} catch (e) {
+			console.error("Error parsing message:", e);
+		}
+	}
+	async send(request: BackCompatWebSocketTtsRequest) {
+		if (!this.socket) {
+			throw new Error("WebSocket not connected");
+		}
+		// Ensure request has a context_id so we can route the response
+		const contextId = request.contextId || uuidv4();
+		const source = new AudioSource();
+		this.sources.set(contextId, source);
+		// Also set as default source if none exists, for compatibility with simple tests
+		if (!this.defaultSource) {
+			this.defaultSource = source;
+		}
+		// Construct payload
+		const payload: any = {
+			model_id: request.modelId,
+			transcript: request.transcript,
+			voice: request.voice,
+			context_id: contextId,
+		};
+		// Output Format
+		if (request.outputFormat) {
+			payload.output_format = {
+				container: request.outputFormat.container,
+				encoding: request.outputFormat.encoding,
+				sample_rate: request.outputFormat.sampleRate,
+				bit_rate: request.outputFormat.bitRate,
+			};
+		} else if (this.config) {
+			payload.output_format = {
+				container: this.config.container,
+				encoding: this.config.encoding,
+				sample_rate: this.config.sampleRate,
+			};
+		}
+		// Generation Config
+		if (request.generationConfig) {
+			payload.generation_config = request.generationConfig;
+		}
+		// Other fields
+		if (request.continue !== undefined) payload.continue = request.continue;
+		if (request.duration !== undefined) payload.duration = request.duration;
+		if (request.addTimestamps !== undefined) payload.add_timestamps = request.addTimestamps;
+		if (request.addPhonemeTimestamps !== undefined) payload.add_phoneme_timestamps = request.addPhonemeTimestamps;
+		this.socket.send(JSON.stringify(payload));
+		return {
+			source: source
+		};
+	}
+	disconnect() {
+		if (this.socket) {
+			this.socket.close();
+		}
+	}
+}
+export class TTSWrapper {
+	private client: Cartesia;
+	constructor(client: Cartesia) {
+		this.client = client;
+	}
+	websocket(config: BackCompatWebSocketOptions) {
+		return new WebSocketWrapper(this.client, config);
+	}
+	async bytes(request: any, requestOptions?: BackCompatRequestOptions) {
+		const params: any = {
+			model_id: request.modelId,
+			transcript: request.transcript,
+			voice: request.voice,
+			generation_config: request.generationConfig,
+			context_id: request.contextId,
+		};
+		if (request.outputFormat) {
+			params.output_format = {
+				...request.outputFormat,
+				sample_rate: request.outputFormat.sampleRate,
+				bit_rate: request.outputFormat.bitRate,
+			};
+			// Remove camelCase keys
+			delete params.output_format.sampleRate;
+			delete params.output_format.bitRate;
+		}
+		const options: any = {};
+		if (requestOptions) {
+			if (requestOptions.timeoutInSeconds) {
+				options.timeout = requestOptions.timeoutInSeconds * 1000;
+			}
+			if (requestOptions.maxRetries !== undefined) {
+				options.maxRetries = requestOptions.maxRetries;
+			}
+			options.headers = requestOptions.headers;
+			options.signal = requestOptions.abortSignal;
+		}
+		// @ts-ignore
+		return this.client.tts.synthesizeBytes(params, options);
+	}
+}

package/src/backcompat/types.ts ADDED Viewed

@@ -0,0 +1,19 @@
+export type Supplier<T> = T | Promise<T> | (() => T | Promise<T>);
+export interface CartesiaClientOptions {
+	environment?: Supplier<"https://api.cartesia.ai" | string>;
+	/** Specify a custom URL to connect the client to. */
+	baseUrl?: Supplier<string>;
+	apiKey?: Supplier<string | undefined>;
+	/** Override the Cartesia-Version header */
+	cartesiaVersion?: string;
+	fetcher?: any;
+}
+export interface BackCompatRequestOptions {
+	timeoutInSeconds?: number;
+	maxRetries?: number;
+	abortSignal?: AbortSignal;
+	cartesiaVersion?: string;
+	headers?: Record<string, string>;
+}

package/src/backcompat/voice-changer-wrapper.ts ADDED Viewed

@@ -0,0 +1,67 @@
+import * as fs from "fs";
+import { Cartesia } from "../client";
+import { type Uploadable } from "../core/uploads";
+import { type RequestOptions as InternalRequestOptions } from "../internal/request-options";
+import { Readable } from "stream";
+import { BackCompatRequestOptions } from "./types";
+export interface BackCompatVoiceChangerBytesRequest {
+	voiceId: string;
+	outputFormatContainer: "raw" | "wav" | "mp3";
+	outputFormatSampleRate: 8000 | 16000 | 22050 | 24000 | 44100 | 48000;
+	outputFormatEncoding?: "pcm_f32le" | "pcm_s16le" | "mulaw" | "alaw";
+	outputFormatBitRate?: 32000 | 64000 | 96000 | 128000 | 192000;
+}
+export class VoiceChangerWrapper {
+	private client: Cartesia;
+	constructor(client: Cartesia) {
+		this.client = client;
+	}
+	async bytes(
+		clip: File | fs.ReadStream | Blob,
+		request: BackCompatVoiceChangerBytesRequest,
+		requestOptions?: BackCompatRequestOptions
+	) {
+		const params: any = {
+			clip: clip as Uploadable,
+			"voice[id]": request.voiceId,
+			"output_format[container]": request.outputFormatContainer,
+			"output_format[sample_rate]": request.outputFormatSampleRate,
+		};
+		if (request.outputFormatEncoding) {
+			params["output_format[encoding]"] = request.outputFormatEncoding;
+		}
+		if (request.outputFormatBitRate) {
+			params["output_format[bit_rate]"] = request.outputFormatBitRate;
+		}
+		const options: InternalRequestOptions = {};
+		if (requestOptions) {
+			if (requestOptions.timeoutInSeconds) {
+				options.timeout = requestOptions.timeoutInSeconds * 1000;
+			}
+			if (requestOptions.maxRetries !== undefined) {
+				options.maxRetries = requestOptions.maxRetries;
+			}
+			options.headers = requestOptions.headers;
+			options.signal = requestOptions.abortSignal;
+		}
+		const response = await this.client.voiceChanger.changeVoiceBytes(params, {
+			...options,
+			__binaryResponse: true,
+		} as any);
+		// @ts-ignore
+		if (response.body) {
+			// @ts-ignore
+			return Readable.fromWeb(response.body);
+		}
+		return response;
+	}
+}

package/src/backcompat/voices-wrapper.ts ADDED Viewed

@@ -0,0 +1,158 @@
+import * as fs from "fs";
+import { Cartesia } from "../client";
+import { type Uploadable } from "../core/uploads";
+import {
+	type VoiceCloneParams,
+	type VoiceMetadata,
+	type SupportedLanguage,
+	type VoiceUpdateParams,
+	type Voice,
+	type VoiceLocalizeParams,
+} from "../resources/voices";
+import { type RequestOptions as InternalRequestOptions } from "../internal/request-options";
+import { BackCompatRequestOptions } from "./types";
+export interface BackCompatCloneVoiceRequest {
+	name: string;
+	description?: string;
+	language: string;
+	mode: "similarity" | "stability";
+	enhance?: boolean;
+	baseVoiceId?: string;
+}
+export interface BackCompatUpdateVoiceRequest {
+	name: string;
+	description: string;
+}
+export interface BackCompatLocalizeVoiceRequest {
+	voiceId: string;
+	name: string;
+	description: string;
+	language:
+		| "en"
+		| "de"
+		| "es"
+		| "fr"
+		| "ja"
+		| "pt"
+		| "zh"
+		| "hi"
+		| "it"
+		| "ko"
+		| "nl"
+		| "pl"
+		| "ru"
+		| "sv"
+		| "tr";
+	originalSpeakerGender: "male" | "female";
+	dialect?:
+		| "au"
+		| "in"
+		| "so"
+		| "uk"
+		| "us"
+		| "mx"
+		| "pe"
+		| "br"
+		| "eu"
+		| "ca";
+}
+export class VoicesWrapper {
+	private client: Cartesia;
+	constructor(client: Cartesia) {
+		this.client = client;
+	}
+	async clone(
+		clip: File | fs.ReadStream | Blob,
+		request: BackCompatCloneVoiceRequest,
+		requestOptions?: BackCompatRequestOptions
+	): Promise<VoiceMetadata> {
+		const params: VoiceCloneParams = {
+			clip: clip as Uploadable,
+			name: request.name,
+			language: request.language as SupportedLanguage,
+		};
+		if (request.description !== undefined) {
+			params.description = request.description;
+		}
+		if (request.baseVoiceId !== undefined) {
+			params.base_voice_id = request.baseVoiceId;
+		}
+		const options: InternalRequestOptions = {};
+		if (requestOptions) {
+			if (requestOptions.timeoutInSeconds) {
+				options.timeout = requestOptions.timeoutInSeconds * 1000;
+			}
+			if (requestOptions.maxRetries !== undefined) {
+				options.maxRetries = requestOptions.maxRetries;
+			}
+			options.headers = requestOptions.headers;
+			options.signal = requestOptions.abortSignal;
+		}
+		return this.client.voices.clone(params, options);
+	}
+	async update(
+		id: string,
+		request: BackCompatUpdateVoiceRequest,
+		requestOptions?: BackCompatRequestOptions
+	): Promise<Voice> {
+		const params: VoiceUpdateParams = {
+			name: request.name,
+			description: request.description,
+		};
+		const options: InternalRequestOptions = {};
+		if (requestOptions) {
+			if (requestOptions.timeoutInSeconds) {
+				options.timeout = requestOptions.timeoutInSeconds * 1000;
+			}
+			if (requestOptions.maxRetries !== undefined) {
+				options.maxRetries = requestOptions.maxRetries;
+			}
+			options.headers = requestOptions.headers;
+			options.signal = requestOptions.abortSignal;
+		}
+		return this.client.voices.update(id, params, options);
+	}
+	async localize(
+		request: BackCompatLocalizeVoiceRequest,
+		requestOptions?: BackCompatRequestOptions
+	): Promise<VoiceMetadata> {
+		const params: VoiceLocalizeParams = {
+			voice_id: request.voiceId,
+			name: request.name,
+			description: request.description,
+			language: request.language as any,
+			original_speaker_gender: request.originalSpeakerGender,
+		};
+		if (request.dialect) {
+			params.dialect = request.dialect as any; // Cast dialect as list might vary slightly or be strict
+		}
+		const options: InternalRequestOptions = {};
+		if (requestOptions) {
+			if (requestOptions.timeoutInSeconds) {
+				options.timeout = requestOptions.timeoutInSeconds * 1000;
+			}
+			if (requestOptions.maxRetries !== undefined) {
+				options.maxRetries = requestOptions.maxRetries;
+			}
+			options.headers = requestOptions.headers;
+			options.signal = requestOptions.abortSignal;
+		}
+		return this.client.voices.localize(params, options);
+	}
+}

package/src/version.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export const VERSION = '3.0.0-b5'; // x-release-please-version
1	+ export const VERSION = '3.0.0-b6'; // x-release-please-version

package/version.d.mts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const VERSION = "3.0.0-b5";
+export declare const VERSION = "3.0.0-b6";
 //# sourceMappingURL=version.d.mts.map

package/version.d.ts CHANGED Viewed

@@ -1,2 +1,2 @@
-export declare const VERSION = "3.0.0-b5";
+export declare const VERSION = "3.0.0-b6";
 //# sourceMappingURL=version.d.ts.map

package/version.js CHANGED Viewed

@@ -1,5 +1,5 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
 exports.VERSION = void 0;
-exports.VERSION = '3.0.0-b5'; // x-release-please-version
+exports.VERSION = '3.0.0-b6'; // x-release-please-version
 //# sourceMappingURL=version.js.map

package/version.mjs CHANGED Viewed

@@ -1,2 +1,2 @@
-export const VERSION = '3.0.0-b5'; // x-release-please-version
+export const VERSION = '3.0.0-b6'; // x-release-please-version
 //# sourceMappingURL=version.mjs.map