npm - modelfusion - Versions diffs - 0.50.0 → 0.51.0 - Mend

modelfusion 0.50.0 → 0.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/README.md +6 -6
package/model-provider/elevenlabs/ElevenLabsError.cjs +0 -1
package/model-provider/elevenlabs/ElevenLabsError.js +0 -1
package/model-provider/elevenlabs/ElevenLabsSpeechModel.cjs +33 -5
package/model-provider/elevenlabs/ElevenLabsSpeechModel.d.ts +6 -1
package/model-provider/elevenlabs/ElevenLabsSpeechModel.js +33 -5
package/model-provider/lmnt/LmntError.cjs +0 -1
package/model-provider/lmnt/LmntError.js +0 -1
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -1,6 +1,6 @@
 # ModelFusion
-> ### Build multi-modal AI apps, chatbots, and agents with JavaScript and TypeScript.
+> ### The TypeScript library for building multi-modal AI applications.
 [![NPM Version](https://img.shields.io/npm/v/modelfusion?color=33cd56&logo=npm)](https://www.npmjs.com/package/modelfusion)
 [![MIT License](https://img.shields.io/github/license/lgrammel/modelfusion)](https://opensource.org/licenses/MIT)
@@ -10,12 +10,9 @@
 [Introduction](#introduction) | [Quick Install](#quick-install) | [Usage](#usage-examples) | [Documentation](#documentation) | [Examples](#more-examples) | [Contributing](#contributing) | [modelfusion.dev](https://modelfusion.dev)
-> [!NOTE]
-> ModelFusion is in its initial development phase. Until version 1.0 there may be breaking changes, because I am still exploring the API design. Feedback and suggestions are welcome.
 ## Introduction
-ModelFusion is a library for building AI applications, chatbots, and agents. Here are the main features:
+**ModelFusion** is a TypeScript library for building AI applications, chatbots, and agents.
 - **Multimodal**: ModelFusion supports a wide range of models including text generation, image generation, text-to-speech, speech-to-text, and embedding models.
 - **Streaming**: ModelFusion supports streaming for many generation models, e.g. text streaming, structure streaming, and full duplex speech streaming.
@@ -26,6 +23,9 @@ ModelFusion is a library for building AI applications, chatbots, and agents. Her
 ## Quick Install
+> [!NOTE]
+> ModelFusion is in its initial development phase. The main API is now mostly stable, but until version 1.0 there may be minor breaking changes. Feedback and suggestions are welcome.
 ```sh
 npm install modelfusion
 ```
@@ -118,7 +118,7 @@ const textStream = await streamText(/* ... */);
 const speechStream = await streamSpeech(
   new ElevenLabsSpeechModel({
     voice: "pNInz6obpgDQGcFmaJgB", // Adam
-    model: "eleven_monolingual_v1",
+    optimizeStreamingLatency: 1,
     voiceSettings: { stability: 1, similarityBoost: 0.35 },
     generationConfig: {
       chunkLengthSchedule: [50, 90, 120, 150, 200],

package/model-provider/elevenlabs/ElevenLabsError.cjs CHANGED Viewed

@@ -5,7 +5,6 @@ const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
 const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
     const responseBody = await response.text();
     try {
-        // TODO implement ElevenLabsError
         return new ApiCallError_js_1.ApiCallError({
             message: responseBody,
             statusCode: response.status,

package/model-provider/elevenlabs/ElevenLabsError.js CHANGED Viewed

@@ -2,7 +2,6 @@ import { ApiCallError } from "../../core/api/ApiCallError.js";
 export const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
     const responseBody = await response.text();
     try {
-        // TODO implement ElevenLabsError
         return new ApiCallError({
             message: responseBody,
             statusCode: response.status,

package/model-provider/elevenlabs/ElevenLabsSpeechModel.cjs CHANGED Viewed

@@ -15,11 +15,14 @@ const elevenLabsModels = [
     "eleven_multilingual_v1",
     "eleven_monolingual_v1",
 ];
-const defaultModel = "eleven_multilingual_v2";
+const defaultModel = "eleven_monolingual_v1";
 /**
  * Synthesize speech using the ElevenLabs Text to Speech API.
  *
- * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
+ * Both regular text-to-speech and full duplex text-to-speech streaming are supported.
+ *
+ * @see https://docs.elevenlabs.io/api-reference/text-to-speech
+ * @see https://docs.elevenlabs.io/api-reference/text-to-speech-websockets
  */
 class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
     constructor(settings) {
@@ -84,7 +87,11 @@ class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
         ]);
         const queue = new AsyncQueue_js_1.AsyncQueue();
         const model = this.settings.model ?? defaultModel;
-        const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input?model_id=${model}`);
+        const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input${assembleQuery({
+            model_id: model,
+            optimize_streaming_latency: this.settings.optimizeStreamingLatency,
+            output_format: this.settings.outputFormat,
+        })}`);
         socket.onopen = async () => {
             const api = this.settings.api ?? new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration();
             // send begin-of-stream (BOS) message:
@@ -158,9 +165,12 @@ class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
     }
 }
 exports.ElevenLabsSpeechModel = ElevenLabsSpeechModel;
-async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, voiceSettings, }) {
+async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, optimizeStreamingLatency, outputFormat, voiceSettings, }) {
     return (0, postToApi_js_1.postJsonToApi)({
-        url: api.assembleUrl(`/text-to-speech/${voiceId}`),
+        url: api.assembleUrl(`/text-to-speech/${voiceId}${assembleQuery({
+            optimize_streaming_latency: optimizeStreamingLatency,
+            output_format: outputFormat,
+        })}`),
         headers: api.headers,
         body: {
             text,
@@ -172,6 +182,24 @@ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfigurat
         abortSignal,
     });
 }
+function assembleQuery(parameters) {
+    let query = "";
+    let hasQuestionMark = false;
+    for (const [key, value] of Object.entries(parameters)) {
+        if (value == null) {
+            continue;
+        }
+        if (!hasQuestionMark) {
+            query += "?";
+            hasQuestionMark = true;
+        }
+        else {
+            query += "&";
+        }
+        query += `${key}=${value}`;
+    }
+    return query;
+}
 function toApiVoiceSettings(voiceSettings) {
     return voiceSettings != null
         ? {

package/model-provider/elevenlabs/ElevenLabsSpeechModel.d.ts CHANGED Viewed

@@ -11,6 +11,8 @@ export interface ElevenLabsSpeechModelSettings extends SpeechGenerationModelSett
     };
     voice: string;
     model?: (typeof elevenLabsModels)[number] | (string & {});
+    optimizeStreamingLatency?: 0 | 1 | 2 | 3 | 4;
+    outputFormat?: "mp3_44100" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100";
     voiceSettings?: {
         stability: number;
         similarityBoost: number;
@@ -24,7 +26,10 @@ export interface ElevenLabsSpeechModelSettings extends SpeechGenerationModelSett
 /**
  * Synthesize speech using the ElevenLabs Text to Speech API.
  *
- * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
+ * Both regular text-to-speech and full duplex text-to-speech streaming are supported.
+ *
+ * @see https://docs.elevenlabs.io/api-reference/text-to-speech
+ * @see https://docs.elevenlabs.io/api-reference/text-to-speech-websockets
  */
 export declare class ElevenLabsSpeechModel extends AbstractModel<ElevenLabsSpeechModelSettings> implements StreamingSpeechGenerationModel<ElevenLabsSpeechModelSettings> {
     constructor(settings: ElevenLabsSpeechModelSettings);

package/model-provider/elevenlabs/ElevenLabsSpeechModel.js CHANGED Viewed

@@ -12,11 +12,14 @@ const elevenLabsModels = [
     "eleven_multilingual_v1",
     "eleven_monolingual_v1",
 ];
-const defaultModel = "eleven_multilingual_v2";
+const defaultModel = "eleven_monolingual_v1";
 /**
  * Synthesize speech using the ElevenLabs Text to Speech API.
  *
- * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
+ * Both regular text-to-speech and full duplex text-to-speech streaming are supported.
+ *
+ * @see https://docs.elevenlabs.io/api-reference/text-to-speech
+ * @see https://docs.elevenlabs.io/api-reference/text-to-speech-websockets
  */
 export class ElevenLabsSpeechModel extends AbstractModel {
     constructor(settings) {
@@ -81,7 +84,11 @@ export class ElevenLabsSpeechModel extends AbstractModel {
         ]);
         const queue = new AsyncQueue();
         const model = this.settings.model ?? defaultModel;
-        const socket = await createSimpleWebSocket(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input?model_id=${model}`);
+        const socket = await createSimpleWebSocket(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input${assembleQuery({
+            model_id: model,
+            optimize_streaming_latency: this.settings.optimizeStreamingLatency,
+            output_format: this.settings.outputFormat,
+        })}`);
         socket.onopen = async () => {
             const api = this.settings.api ?? new ElevenLabsApiConfiguration();
             // send begin-of-stream (BOS) message:
@@ -154,9 +161,12 @@ export class ElevenLabsSpeechModel extends AbstractModel {
         });
     }
 }
-async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, voiceSettings, }) {
+async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, optimizeStreamingLatency, outputFormat, voiceSettings, }) {
     return postJsonToApi({
-        url: api.assembleUrl(`/text-to-speech/${voiceId}`),
+        url: api.assembleUrl(`/text-to-speech/${voiceId}${assembleQuery({
+            optimize_streaming_latency: optimizeStreamingLatency,
+            output_format: outputFormat,
+        })}`),
         headers: api.headers,
         body: {
             text,
@@ -168,6 +178,24 @@ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfigurat
         abortSignal,
     });
 }
+function assembleQuery(parameters) {
+    let query = "";
+    let hasQuestionMark = false;
+    for (const [key, value] of Object.entries(parameters)) {
+        if (value == null) {
+            continue;
+        }
+        if (!hasQuestionMark) {
+            query += "?";
+            hasQuestionMark = true;
+        }
+        else {
+            query += "&";
+        }
+        query += `${key}=${value}`;
+    }
+    return query;
+}
 function toApiVoiceSettings(voiceSettings) {
     return voiceSettings != null
         ? {

package/model-provider/lmnt/LmntError.cjs CHANGED Viewed

@@ -5,7 +5,6 @@ const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
 const failedLmntCallResponseHandler = async ({ response, url, requestBodyValues }) => {
     const responseBody = await response.text();
     try {
-        // TODO implement LmntError
         return new ApiCallError_js_1.ApiCallError({
             message: responseBody,
             statusCode: response.status,

package/model-provider/lmnt/LmntError.js CHANGED Viewed

@@ -2,7 +2,6 @@ import { ApiCallError } from "../../core/api/ApiCallError.js";
 export const failedLmntCallResponseHandler = async ({ response, url, requestBodyValues }) => {
     const responseBody = await response.text();
     try {
-        // TODO implement LmntError
         return new ApiCallError({
             message: responseBody,
             statusCode: response.status,

package/package.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "name": "modelfusion",
   "description": "Build multimodal applications, chatbots, and agents with JavaScript and TypeScript.",
-  "version": "0.50.0",
+  "version": "0.51.0",
   "author": "Lars Grammel",
   "license": "MIT",
   "keywords": [