npm - modelfusion - Versions diffs - 0.16.0 → 0.18.0 - Mend

modelfusion 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/README.md CHANGED Viewed

@@ -20,7 +20,7 @@ ModelFusion is a library for building AI apps, chatbots, and agents. It provides
 - **Type inference and validation**: ModelFusion uses TypeScript and [Zod](https://github.com/colinhacks/zod) to infer types wherever possible and to validate model responses.
 - **Flexibility and control**: AI application development can be complex and unique to each project. With ModelFusion, you have complete control over the prompts and model settings, and you can access the raw responses from the models quickly to build what you need.
 - **No chains and predefined prompts**: Use the concepts provided by JavaScript (variables, functions, etc.) and explicit prompts to build applications you can easily understand and control. Not black magic.
-- **More than LLMs**: ModelFusion supports other models, e.g., text-to-image and voice-to-text, to help you build rich AI applications that go beyond just text.
+- **Multimodal Support**: Beyond just LLMs, ModelFusion encompasses a diverse array of models including text generation, text-to-speech, speech-to-text, and image generation, allowing you to build multifaceted AI applications with ease.
 - **Integrated support features**: Essential features like logging, retries, throttling, tracing, and error handling are built-in, helping you focus more on building your application.
 ## Quick Install
@@ -260,9 +260,9 @@ const { tool, parameters, result, text } = await useToolOrGenerateText(
 );
 ```
-### [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
+### [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
-Turn audio (voice) into text.
+Turn speech (audio) into text.
 ```ts
 const transcription = await transcribe(
@@ -274,6 +274,20 @@ const transcription = await transcribe(
 );
 ```
+### [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
+Turn text into speech (audio).
+```ts
+// `speech` is a Buffer with MP3 audio data
+const speech = await synthesizeSpeech(
+  new ElevenLabsSpeechSynthesisModel({
+    voice: "ErXwobaYiN019PkySvjV",
+  }),
+  "Hello, World!"
+);
+```
 ### [Generate Image](https://modelfusion.dev/guide/function/generate-image)
 Generate a base64-encoded image from a prompt.
@@ -356,7 +370,8 @@ const { chunks } = await retrieveTextChunks(
   - [Generate JSON or text](https://modelfusion.dev/guide/function/generate-json-or-text)
   - [Embed Text](https://modelfusion.dev/guide/function/embed-text)
   - [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
-  - [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
+  - [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
+  - [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
   - [Generate images](https://modelfusion.dev/guide/function/generate-image)
 - Summarize text
 - [Tools](https://modelfusion.dev/guide/tools)
@@ -375,18 +390,30 @@ const { chunks } = await retrieveTextChunks(
 ### Model Providers
-|                                                                                       | [OpenAI](https://modelfusion.dev/integration/model-provider/openai) | [Cohere](https://modelfusion.dev/integration/model-provider/cohere) | [Llama.cpp](https://modelfusion.dev/integration/model-provider/llamacpp) | [Hugging Face](https://modelfusion.dev/integration/model-provider/huggingface) | [Stability AI](https://modelfusion.dev/integration/model-provider/stability) | [Automatic1111](https://modelfusion.dev/integration/model-provider/automatic1111) |
-| ------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | --------------------------------------------------------------------------------- |
-| Hosting                                                                               | cloud                                                               | cloud                                                               | server (local)                                                           | cloud                                                                          | cloud                                                                        | server (local)                                                                    |
-| [Generate text](https://modelfusion.dev/guide/function/generate-text)                 | ✅                                                                  | ✅                                                                  | ✅                                                                       | ✅                                                                             |                                                                              |                                                                                   |
-| [Stream text](https://modelfusion.dev/guide/function/generate-text)                   | ✅                                                                  | ✅                                                                  | ✅                                                                       |                                                                                |                                                                              |                                                                                   |
-| [Generate JSON](https://modelfusion.dev/guide/function/generate-json)                 | chat models                                                         |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
-| [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models                                                         |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
-| [Embed text](https://modelfusion.dev/guide/function/embed-text)                       | ✅                                                                  | ✅                                                                  | ✅                                                                       | ✅                                                                             |                                                                              |                                                                                   |
-| [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text)                 | full                                                                | full                                                                | basic                                                                    |                                                                                |                                                                              |                                                                                   |
-| [Generate image](https://modelfusion.dev/guide/function/generate-image)               | ✅                                                                  |                                                                     |                                                                          |                                                                                | ✅                                                                           | ✅                                                                                |
-| [Transcribe audio](https://modelfusion.dev/guide/function/transcribe-audio)           | ✅                                                                  |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
-| [Cost calculation](https://modelfusion.dev/guide/run/cost-calculation)                | ✅                                                                  |                                                                     |                                                                          |                                                                                |                                                                              |                                                                                   |
+#### Text and JSON Generation
+|                                                                                       | [OpenAI](https://modelfusion.dev/integration/model-provider/openai) | [Cohere](https://modelfusion.dev/integration/model-provider/cohere) | [Llama.cpp](https://modelfusion.dev/integration/model-provider/llamacpp) | [Hugging Face](https://modelfusion.dev/integration/model-provider/huggingface) |
+| ------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------ |
+| [Generate text](https://modelfusion.dev/guide/function/generate-text)                 | ✅                                                                  | ✅                                                                  | ✅                                                                       | ✅                                                                             |
+| [Stream text](https://modelfusion.dev/guide/function/generate-text)                   | ✅                                                                  | ✅                                                                  | ✅                                                                       |                                                                                |
+| [Generate JSON](https://modelfusion.dev/guide/function/generate-json)                 | chat models                                                         |                                                                     |                                                                          |                                                                                |
+| [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models                                                         |                                                                     |                                                                          |                                                                                |
+| [Embed text](https://modelfusion.dev/guide/function/embed-text)                       | ✅                                                                  | ✅                                                                  | ✅                                                                       | ✅                                                                             |
+| [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text)                 | full                                                                | full                                                                | basic                                                                    |                                                                                |
+#### Image Generation
+- [OpenAI (Dall·E)](https://modelfusion.dev/integration/model-provider/openai)
+- [Stability AI](https://modelfusion.dev/integration/model-provider/stability)
+- [Automatic1111](https://modelfusion.dev/integration/model-provider/automatic1111)
+#### Speech Transcription
+- [OpenAI (Whisper)](https://modelfusion.dev/integration/model-provider/openai)
+#### Speech Synthesis
+- [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs)
 ### Vector Indices

package/model-function/ModelCallEvent.d.ts CHANGED Viewed

@@ -5,12 +5,13 @@ import { ImageGenerationFinishedEvent, ImageGenerationStartedEvent } from "./gen
 import { JsonGenerationFinishedEvent, JsonGenerationStartedEvent } from "./generate-json/JsonGenerationEvent.js";
 import { TextGenerationFinishedEvent, TextGenerationStartedEvent } from "./generate-text/TextGenerationEvent.js";
 import { TextStreamingFinishedEvent, TextStreamingStartedEvent } from "./generate-text/TextStreamingEvent.js";
-import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-audio/TranscriptionEvent.js";
+import { SpeechSynthesisFinishedEvent, SpeechSynthesisStartedEvent } from "./synthesize-speech/SpeechSynthesisEvent.js";
+import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-speech/TranscriptionEvent.js";
 export type ModelCallStartedEventMetadata = RunFunctionStartedEventMetadata & {
     model: ModelInformation;
 };
-export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
+export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
 export type ModelCallFinishedEventMetadata = RunFunctionFinishedEventMetadata & {
     model: ModelInformation;
 };
-export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
+export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;

package/model-function/SuccessfulModelCall.cjs CHANGED Viewed

@@ -18,6 +18,7 @@ const eventTypeToCostType = {
     "image-generation-finished": "image-generation",
     "json-generation-finished": "json-generation",
     "json-or-text-generation-finished": "json-or-text-generation",
+    "speech-synthesis-finished": "speech-synthesis",
     "text-embedding-finished": "text-embedding",
     "text-generation-finished": "text-generation",
     "text-streaming-finished": "text-streaming",

package/model-function/SuccessfulModelCall.d.ts CHANGED Viewed

@@ -1,7 +1,7 @@
 import { RunFunctionEvent } from "../run/RunFunctionEvent.js";
 import { ModelInformation } from "./ModelInformation.js";
 export type SuccessfulModelCall = {
-    type: "image-generation" | "json-generation" | "json-or-text-generation" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
+    type: "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
     model: ModelInformation;
     settings: unknown;
     response: unknown;

package/model-function/SuccessfulModelCall.js CHANGED Viewed

@@ -14,6 +14,7 @@ const eventTypeToCostType = {
     "image-generation-finished": "image-generation",
     "json-generation-finished": "json-generation",
     "json-or-text-generation-finished": "json-or-text-generation",
+    "speech-synthesis-finished": "speech-synthesis",
     "text-embedding-finished": "text-embedding",
     "text-generation-finished": "text-generation",
     "text-streaming-finished": "text-streaming",

package/model-function/index.cjs CHANGED Viewed

@@ -41,8 +41,11 @@ __exportStar(require("./generate-text/TextGenerationEvent.cjs"), exports);
 __exportStar(require("./generate-text/TextGenerationModel.cjs"), exports);
 __exportStar(require("./generate-text/generateText.cjs"), exports);
 __exportStar(require("./generate-text/streamText.cjs"), exports);
+__exportStar(require("./synthesize-speech/SpeechSynthesisEvent.cjs"), exports);
+__exportStar(require("./synthesize-speech/SpeechSynthesisModel.cjs"), exports);
+__exportStar(require("./synthesize-speech/synthesizeSpeech.cjs"), exports);
 __exportStar(require("./tokenize-text/Tokenizer.cjs"), exports);
 __exportStar(require("./tokenize-text/countTokens.cjs"), exports);
-__exportStar(require("./transcribe-audio/TranscriptionEvent.cjs"), exports);
-__exportStar(require("./transcribe-audio/TranscriptionModel.cjs"), exports);
-__exportStar(require("./transcribe-audio/transcribe.cjs"), exports);
+__exportStar(require("./transcribe-speech/TranscriptionEvent.cjs"), exports);
+__exportStar(require("./transcribe-speech/TranscriptionModel.cjs"), exports);
+__exportStar(require("./transcribe-speech/transcribe.cjs"), exports);

package/model-function/index.d.ts CHANGED Viewed

@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
 export * from "./generate-text/TextGenerationModel.js";
 export * from "./generate-text/generateText.js";
 export * from "./generate-text/streamText.js";
+export * from "./synthesize-speech/SpeechSynthesisEvent.js";
+export * from "./synthesize-speech/SpeechSynthesisModel.js";
+export * from "./synthesize-speech/synthesizeSpeech.js";
 export * from "./tokenize-text/Tokenizer.js";
 export * from "./tokenize-text/countTokens.js";
-export * from "./transcribe-audio/TranscriptionEvent.js";
-export * from "./transcribe-audio/TranscriptionModel.js";
-export * from "./transcribe-audio/transcribe.js";
+export * from "./transcribe-speech/TranscriptionEvent.js";
+export * from "./transcribe-speech/TranscriptionModel.js";
+export * from "./transcribe-speech/transcribe.js";

package/model-function/index.js CHANGED Viewed

@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
 export * from "./generate-text/TextGenerationModel.js";
 export * from "./generate-text/generateText.js";
 export * from "./generate-text/streamText.js";
+export * from "./synthesize-speech/SpeechSynthesisEvent.js";
+export * from "./synthesize-speech/SpeechSynthesisModel.js";
+export * from "./synthesize-speech/synthesizeSpeech.js";
 export * from "./tokenize-text/Tokenizer.js";
 export * from "./tokenize-text/countTokens.js";
-export * from "./transcribe-audio/TranscriptionEvent.js";
-export * from "./transcribe-audio/TranscriptionModel.js";
-export * from "./transcribe-audio/transcribe.js";
+export * from "./transcribe-speech/TranscriptionEvent.js";
+export * from "./transcribe-speech/TranscriptionModel.js";
+export * from "./transcribe-speech/transcribe.js";

package/model-function/synthesize-speech/SpeechSynthesisEvent.d.ts ADDED Viewed

@@ -0,0 +1,22 @@
+/// <reference types="node" resolution-mode="require"/>
+import { ModelCallFinishedEventMetadata, ModelCallStartedEventMetadata } from "../ModelCallEvent.js";
+export type SpeechSynthesisStartedEvent = {
+    type: "speech-synthesis-started";
+    metadata: ModelCallStartedEventMetadata;
+    settings: unknown;
+    text: string;
+};
+export type SpeechSynthesisFinishedEvent = {
+    type: "speech-synthesis-finished";
+    metadata: ModelCallFinishedEventMetadata;
+    settings: unknown;
+    text: string;
+} & ({
+    status: "success";
+    response: Buffer;
+} | {
+    status: "failure";
+    error: unknown;
+} | {
+    status: "abort";
+});

package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts ADDED Viewed

@@ -0,0 +1,11 @@
+/// <reference types="node" resolution-mode="require"/>
+import { FunctionOptions } from "../FunctionOptions.js";
+import { Model, ModelSettings } from "../Model.js";
+export interface SpeechSynthesisModelSettings extends ModelSettings {
+}
+export interface SpeechSynthesisModel<SETTINGS> extends Model<SETTINGS> {
+    /**
+     * Generates an mp3 audio buffer that contains the speech for the given text.
+     */
+    generateSpeechResponse: (text: string, options?: FunctionOptions<SETTINGS>) => PromiseLike<Buffer>;
+}

package/model-function/synthesize-speech/synthesizeSpeech.cjs ADDED Viewed

@@ -0,0 +1,49 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.synthesizeSpeech = void 0;
+const executeCall_js_1 = require("../executeCall.cjs");
+async function synthesizeSpeech(model, text, options) {
+    const result = await (0, executeCall_js_1.executeCall)({
+        model,
+        options,
+        generateResponse: (options) => model.generateSpeechResponse(text, options),
+        extractOutputValue: (buffer) => buffer,
+        getStartEvent: (metadata, settings) => ({
+            type: "speech-synthesis-started",
+            metadata,
+            settings,
+            text,
+        }),
+        getAbortEvent: (metadata, settings) => ({
+            type: "speech-synthesis-finished",
+            status: "abort",
+            settings,
+            metadata,
+            text,
+        }),
+        getFailureEvent: (metadata, settings, error) => ({
+            type: "speech-synthesis-finished",
+            status: "failure",
+            metadata,
+            settings,
+            text,
+            error,
+        }),
+        getSuccessEvent: (metadata, settings, response, output) => ({
+            type: "speech-synthesis-finished",
+            status: "success",
+            metadata,
+            settings,
+            text,
+            response,
+            speech: output,
+        }),
+    });
+    return options?.fullResponse === true
+        ? {
+            speech: result.output,
+            metadata: result.metadata,
+        }
+        : result.output;
+}
+exports.synthesizeSpeech = synthesizeSpeech;

package/model-function/synthesize-speech/synthesizeSpeech.d.ts ADDED Viewed

@@ -0,0 +1,16 @@
+/// <reference types="node" resolution-mode="require"/>
+import { FunctionOptions } from "../FunctionOptions.js";
+import { CallMetadata } from "../executeCall.js";
+import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
+/**
+ * Synthesizes speech from text.
+ */
+export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options: FunctionOptions<SETTINGS> & {
+    fullResponse: true;
+}): Promise<{
+    speech: Buffer;
+    metadata: CallMetadata<SpeechSynthesisModel<SETTINGS>>;
+}>;
+export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options?: FunctionOptions<SETTINGS> & {
+    fullResponse?: false;
+}): Promise<Buffer>;

package/model-function/synthesize-speech/synthesizeSpeech.js ADDED Viewed

@@ -0,0 +1,45 @@
+import { executeCall } from "../executeCall.js";
+export async function synthesizeSpeech(model, text, options) {
+    const result = await executeCall({
+        model,
+        options,
+        generateResponse: (options) => model.generateSpeechResponse(text, options),
+        extractOutputValue: (buffer) => buffer,
+        getStartEvent: (metadata, settings) => ({
+            type: "speech-synthesis-started",
+            metadata,
+            settings,
+            text,
+        }),
+        getAbortEvent: (metadata, settings) => ({
+            type: "speech-synthesis-finished",
+            status: "abort",
+            settings,
+            metadata,
+            text,
+        }),
+        getFailureEvent: (metadata, settings, error) => ({
+            type: "speech-synthesis-finished",
+            status: "failure",
+            metadata,
+            settings,
+            text,
+            error,
+        }),
+        getSuccessEvent: (metadata, settings, response, output) => ({
+            type: "speech-synthesis-finished",
+            status: "success",
+            metadata,
+            settings,
+            text,
+            response,
+            speech: output,
+        }),
+    });
+    return options?.fullResponse === true
+        ? {
+            speech: result.output,
+            metadata: result.metadata,
+        }
+        : result.output;
+}

package/model-function/transcribe-speech/TranscriptionEvent.cjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ "use strict";
2	+ Object.defineProperty(exports, "__esModule", { value: true });

package/model-function/transcribe-speech/TranscriptionEvent.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/model-function/transcribe-speech/TranscriptionModel.cjs ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ "use strict";
2	+ Object.defineProperty(exports, "__esModule", { value: true });

package/model-function/transcribe-speech/TranscriptionModel.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export {};

package/model-provider/elevenlabs/ElevenLabsError.cjs ADDED Viewed

@@ -0,0 +1,31 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.failedElevenLabsCallResponseHandler = void 0;
+const ApiCallError_js_1 = require("../../util/api/ApiCallError.cjs");
+const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
+    const responseBody = await response.text();
+    try {
+        // TODO implement ElevenLabsError
+        return new ApiCallError_js_1.ApiCallError({
+            message: responseBody,
+            statusCode: response.status,
+            url,
+            requestBodyValues,
+        });
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            if (error.name === "AbortError" || error instanceof ApiCallError_js_1.ApiCallError) {
+                throw error;
+            }
+        }
+        throw new ApiCallError_js_1.ApiCallError({
+            message: responseBody,
+            cause: error,
+            statusCode: response.status,
+            url,
+            requestBodyValues,
+        });
+    }
+};
+exports.failedElevenLabsCallResponseHandler = failedElevenLabsCallResponseHandler;

package/model-provider/elevenlabs/ElevenLabsError.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import { ApiCallError } from "../../util/api/ApiCallError.js";
+import { ResponseHandler } from "../../util/api/postToApi.js";
+export declare const failedElevenLabsCallResponseHandler: ResponseHandler<ApiCallError>;

package/model-provider/elevenlabs/ElevenLabsError.js ADDED Viewed

@@ -0,0 +1,27 @@
+import { ApiCallError } from "../../util/api/ApiCallError.js";
+export const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
+    const responseBody = await response.text();
+    try {
+        // TODO implement ElevenLabsError
+        return new ApiCallError({
+            message: responseBody,
+            statusCode: response.status,
+            url,
+            requestBodyValues,
+        });
+    }
+    catch (error) {
+        if (error instanceof Error) {
+            if (error.name === "AbortError" || error instanceof ApiCallError) {
+                throw error;
+            }
+        }
+        throw new ApiCallError({
+            message: responseBody,
+            cause: error,
+            statusCode: response.status,
+            url,
+            requestBodyValues,
+        });
+    }
+};

package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs ADDED Viewed

@@ -0,0 +1,88 @@
+"use strict";
+Object.defineProperty(exports, "__esModule", { value: true });
+exports.ElevenLabsSpeechSynthesisModel = void 0;
+const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
+const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
+const postToApi_js_1 = require("../../util/api/postToApi.cjs");
+const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
+class ElevenLabsSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
+    constructor(settings) {
+        super({ settings });
+        Object.defineProperty(this, "provider", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "elevenlabs"
+        });
+        Object.defineProperty(this, "modelName", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: null
+        });
+    }
+    get apiKey() {
+        const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
+        if (apiKey == null) {
+            throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
+        }
+        return apiKey;
+    }
+    async callAPI(text, options) {
+        const run = options?.run;
+        const settings = options?.settings;
+        const combinedSettings = {
+            ...this.settings,
+            ...settings,
+        };
+        return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
+            retry: this.settings.retry,
+            throttle: this.settings.throttle,
+            call: async () => callElevenLabsTextToSpeechAPI({
+                baseUrl: combinedSettings.baseUrl,
+                abortSignal: run?.abortSignal,
+                apiKey: this.apiKey,
+                text,
+                voiceId: combinedSettings.voice,
+                modelId: combinedSettings.model,
+                voiceSettings: combinedSettings.voiceSettings,
+            }),
+        });
+    }
+    generateSpeechResponse(text, options) {
+        return this.callAPI(text, options);
+    }
+    withSettings(additionalSettings) {
+        return new ElevenLabsSpeechSynthesisModel({
+            ...this.settings,
+            ...additionalSettings,
+        });
+    }
+}
+exports.ElevenLabsSpeechSynthesisModel = ElevenLabsSpeechSynthesisModel;
+/**
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
+ */
+async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
+    return (0, postToApi_js_1.postJsonToApi)({
+        url: `${baseUrl}/text-to-speech/${voiceId}`,
+        headers: {
+            "xi-api-key": apiKey,
+        },
+        body: {
+            text,
+            model_id: modelId,
+            voice_settings: voiceSettings != null
+                ? {
+                    stability: voiceSettings.stability,
+                    similarity_boost: voiceSettings.similarityBoost,
+                    style: voiceSettings.style,
+                    use_speaker_boost: voiceSettings.useSpeakerBoost,
+                }
+                : undefined,
+        },
+        failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
+        successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
+        abortSignal,
+    });
+}

package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts ADDED Viewed

@@ -0,0 +1,29 @@
+/// <reference types="node" resolution-mode="require"/>
+import { AbstractModel } from "../../model-function/AbstractModel.js";
+import { FunctionOptions } from "../../model-function/FunctionOptions.js";
+import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "../../model-function/synthesize-speech/SpeechSynthesisModel.js";
+import { RetryFunction } from "../../util/api/RetryFunction.js";
+import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
+export interface ElevenLabsSpeechSynthesisModelSettings extends SpeechSynthesisModelSettings {
+    voice: string;
+    baseUrl?: string;
+    apiKey?: string;
+    model?: string;
+    voiceSettings?: {
+        stability: number;
+        similarityBoost: number;
+        style?: number;
+        useSpeakerBoost?: boolean;
+    };
+    retry?: RetryFunction;
+    throttle?: ThrottleFunction;
+}
+export declare class ElevenLabsSpeechSynthesisModel extends AbstractModel<ElevenLabsSpeechSynthesisModelSettings> implements SpeechSynthesisModel<ElevenLabsSpeechSynthesisModelSettings> {
+    constructor(settings: ElevenLabsSpeechSynthesisModelSettings);
+    readonly provider = "elevenlabs";
+    readonly modelName: null;
+    private get apiKey();
+    private callAPI;
+    generateSpeechResponse(text: string, options?: FunctionOptions<ElevenLabsSpeechSynthesisModelSettings> | undefined): Promise<Buffer>;
+    withSettings(additionalSettings: Partial<ElevenLabsSpeechSynthesisModelSettings>): this;
+}

package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js ADDED Viewed

@@ -0,0 +1,84 @@
+import { AbstractModel } from "../../model-function/AbstractModel.js";
+import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
+import { createAudioMpegResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
+import { failedElevenLabsCallResponseHandler } from "./ElevenLabsError.js";
+export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
+    constructor(settings) {
+        super({ settings });
+        Object.defineProperty(this, "provider", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: "elevenlabs"
+        });
+        Object.defineProperty(this, "modelName", {
+            enumerable: true,
+            configurable: true,
+            writable: true,
+            value: null
+        });
+    }
+    get apiKey() {
+        const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
+        if (apiKey == null) {
+            throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
+        }
+        return apiKey;
+    }
+    async callAPI(text, options) {
+        const run = options?.run;
+        const settings = options?.settings;
+        const combinedSettings = {
+            ...this.settings,
+            ...settings,
+        };
+        return callWithRetryAndThrottle({
+            retry: this.settings.retry,
+            throttle: this.settings.throttle,
+            call: async () => callElevenLabsTextToSpeechAPI({
+                baseUrl: combinedSettings.baseUrl,
+                abortSignal: run?.abortSignal,
+                apiKey: this.apiKey,
+                text,
+                voiceId: combinedSettings.voice,
+                modelId: combinedSettings.model,
+                voiceSettings: combinedSettings.voiceSettings,
+            }),
+        });
+    }
+    generateSpeechResponse(text, options) {
+        return this.callAPI(text, options);
+    }
+    withSettings(additionalSettings) {
+        return new ElevenLabsSpeechSynthesisModel({
+            ...this.settings,
+            ...additionalSettings,
+        });
+    }
+}
+/**
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
+ */
+async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
+    return postJsonToApi({
+        url: `${baseUrl}/text-to-speech/${voiceId}`,
+        headers: {
+            "xi-api-key": apiKey,
+        },
+        body: {
+            text,
+            model_id: modelId,
+            voice_settings: voiceSettings != null
+                ? {
+                    stability: voiceSettings.stability,
+                    similarity_boost: voiceSettings.similarityBoost,
+                    style: voiceSettings.style,
+                    use_speaker_boost: voiceSettings.useSpeakerBoost,
+                }
+                : undefined,
+        },
+        failedResponseHandler: failedElevenLabsCallResponseHandler,
+        successfulResponseHandler: createAudioMpegResponseHandler(),
+        abortSignal,
+    });
+}

package/model-provider/elevenlabs/index.cjs ADDED Viewed

@@ -0,0 +1,17 @@
+"use strict";
+var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    var desc = Object.getOwnPropertyDescriptor(m, k);
+    if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
+      desc = { enumerable: true, get: function() { return m[k]; } };
+    }
+    Object.defineProperty(o, k2, desc);
+}) : (function(o, m, k, k2) {
+    if (k2 === undefined) k2 = k;
+    o[k2] = m[k];
+}));
+var __exportStar = (this && this.__exportStar) || function(m, exports) {
+    for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
+};
+Object.defineProperty(exports, "__esModule", { value: true });
+__exportStar(require("./ElevenLabsSpeechSynthesisModel.cjs"), exports);

package/model-provider/elevenlabs/index.d.ts ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from "./ElevenLabsSpeechSynthesisModel.js";

package/model-provider/elevenlabs/index.js ADDED Viewed

	@@ -0,0 +1 @@
1	+ export * from "./ElevenLabsSpeechSynthesisModel.js";

package/model-provider/index.cjs CHANGED Viewed

@@ -16,6 +16,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
 Object.defineProperty(exports, "__esModule", { value: true });
 __exportStar(require("./automatic1111/index.cjs"), exports);
 __exportStar(require("./cohere/index.cjs"), exports);
+__exportStar(require("./elevenlabs/index.cjs"), exports);
 __exportStar(require("./huggingface/index.cjs"), exports);
 __exportStar(require("./llamacpp/index.cjs"), exports);
 __exportStar(require("./openai/index.cjs"), exports);

package/model-provider/index.d.ts CHANGED Viewed

@@ -1,5 +1,6 @@
 export * from "./automatic1111/index.js";
 export * from "./cohere/index.js";
+export * from "./elevenlabs/index.js";
 export * from "./huggingface/index.js";
 export * from "./llamacpp/index.js";
 export * from "./openai/index.js";

package/model-provider/index.js CHANGED Viewed

@@ -1,5 +1,6 @@
 export * from "./automatic1111/index.js";
 export * from "./cohere/index.js";
+export * from "./elevenlabs/index.js";
 export * from "./huggingface/index.js";
 export * from "./llamacpp/index.js";
 export * from "./openai/index.js";

package/model-provider/openai/OpenAITranscriptionModel.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
 import z from "zod";
 import { AbstractModel } from "../../model-function/AbstractModel.js";
 import { FunctionOptions } from "../../model-function/FunctionOptions.js";
-import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-audio/TranscriptionModel.js";
+import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-speech/TranscriptionModel.js";
 import { RetryFunction } from "../../util/api/RetryFunction.js";
 import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
 import { ResponseHandler } from "../../util/api/postToApi.js";

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "modelfusion",
   "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
-  "version": "0.16.0",
+  "version": "0.18.0",
   "author": "Lars Grammel",
   "license": "MIT",
   "keywords": [

package/util/api/postToApi.cjs CHANGED Viewed

@@ -1,6 +1,6 @@
 "use strict";
 Object.defineProperty(exports, "__esModule", { value: true });
-exports.postToApi = exports.postJsonToApi = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
+exports.postToApi = exports.postJsonToApi = exports.createAudioMpegResponseHandler = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
 const ApiCallError_js_1 = require("./ApiCallError.cjs");
 const createJsonResponseHandler = (responseSchema) => async ({ response, url, requestBodyValues }) => {
     const parsedResult = responseSchema.safeParse(await response.json());
@@ -18,6 +18,19 @@ const createJsonResponseHandler = (responseSchema) => async ({ response, url, re
 exports.createJsonResponseHandler = createJsonResponseHandler;
 const createTextResponseHandler = () => async ({ response }) => response.text();
 exports.createTextResponseHandler = createTextResponseHandler;
+const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
+    if (response.headers.get("Content-Type") !== "audio/mpeg") {
+        throw new ApiCallError_js_1.ApiCallError({
+            message: "Invalid Content-Type (must be audio/mpeg)",
+            statusCode: response.status,
+            url,
+            requestBodyValues,
+        });
+    }
+    const arrayBuffer = await response.arrayBuffer();
+    return Buffer.from(arrayBuffer);
+};
+exports.createAudioMpegResponseHandler = createAudioMpegResponseHandler;
 const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => (0, exports.postToApi)({
     url,
     headers: {

package/util/api/postToApi.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+/// <reference types="node" resolution-mode="require"/>
 import { z } from "zod";
 import { ApiCallError } from "./ApiCallError.js";
 export type ResponseHandler<T> = (options: {
@@ -7,6 +8,7 @@ export type ResponseHandler<T> = (options: {
 }) => PromiseLike<T>;
 export declare const createJsonResponseHandler: <T>(responseSchema: z.ZodType<T, z.ZodTypeDef, T>) => ResponseHandler<T>;
 export declare const createTextResponseHandler: () => ResponseHandler<string>;
+export declare const createAudioMpegResponseHandler: () => ResponseHandler<Buffer>;
 export declare const postJsonToApi: <T>({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }: {
     url: string;
     headers?: Record<string, string> | undefined;

package/util/api/postToApi.js CHANGED Viewed

@@ -13,6 +13,18 @@ export const createJsonResponseHandler = (responseSchema) => async ({ response,
     return parsedResult.data;
 };
 export const createTextResponseHandler = () => async ({ response }) => response.text();
+export const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
+    if (response.headers.get("Content-Type") !== "audio/mpeg") {
+        throw new ApiCallError({
+            message: "Invalid Content-Type (must be audio/mpeg)",
+            statusCode: response.status,
+            url,
+            requestBodyValues,
+        });
+    }
+    const arrayBuffer = await response.arrayBuffer();
+    return Buffer.from(arrayBuffer);
+};
 export const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => postToApi({
     url,
     headers: {