modelfusion 0.50.0 → 0.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -6
- package/model-provider/elevenlabs/ElevenLabsError.cjs +0 -1
- package/model-provider/elevenlabs/ElevenLabsError.js +0 -1
- package/model-provider/elevenlabs/ElevenLabsSpeechModel.cjs +33 -5
- package/model-provider/elevenlabs/ElevenLabsSpeechModel.d.ts +6 -1
- package/model-provider/elevenlabs/ElevenLabsSpeechModel.js +33 -5
- package/model-provider/lmnt/LmntError.cjs +0 -1
- package/model-provider/lmnt/LmntError.js +0 -1
- package/package.json +1 -1
package/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# ModelFusion
|
2
2
|
|
3
|
-
> ###
|
3
|
+
> ### The TypeScript library for building multi-modal AI applications.
|
4
4
|
|
5
5
|
[](https://www.npmjs.com/package/modelfusion)
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
@@ -10,12 +10,9 @@
|
|
10
10
|
|
11
11
|
[Introduction](#introduction) | [Quick Install](#quick-install) | [Usage](#usage-examples) | [Documentation](#documentation) | [Examples](#more-examples) | [Contributing](#contributing) | [modelfusion.dev](https://modelfusion.dev)
|
12
12
|
|
13
|
-
> [!NOTE]
|
14
|
-
> ModelFusion is in its initial development phase. Until version 1.0 there may be breaking changes, because I am still exploring the API design. Feedback and suggestions are welcome.
|
15
|
-
|
16
13
|
## Introduction
|
17
14
|
|
18
|
-
ModelFusion is a library for building AI applications, chatbots, and agents.
|
15
|
+
**ModelFusion** is a TypeScript library for building AI applications, chatbots, and agents.
|
19
16
|
|
20
17
|
- **Multimodal**: ModelFusion supports a wide range of models including text generation, image generation, text-to-speech, speech-to-text, and embedding models.
|
21
18
|
- **Streaming**: ModelFusion supports streaming for many generation models, e.g. text streaming, structure streaming, and full duplex speech streaming.
|
@@ -26,6 +23,9 @@ ModelFusion is a library for building AI applications, chatbots, and agents. Her
|
|
26
23
|
|
27
24
|
## Quick Install
|
28
25
|
|
26
|
+
> [!NOTE]
|
27
|
+
> ModelFusion is in its initial development phase. The main API is now mostly stable, but until version 1.0 there may be minor breaking changes. Feedback and suggestions are welcome.
|
28
|
+
|
29
29
|
```sh
|
30
30
|
npm install modelfusion
|
31
31
|
```
|
@@ -118,7 +118,7 @@ const textStream = await streamText(/* ... */);
|
|
118
118
|
const speechStream = await streamSpeech(
|
119
119
|
new ElevenLabsSpeechModel({
|
120
120
|
voice: "pNInz6obpgDQGcFmaJgB", // Adam
|
121
|
-
|
121
|
+
optimizeStreamingLatency: 1,
|
122
122
|
voiceSettings: { stability: 1, similarityBoost: 0.35 },
|
123
123
|
generationConfig: {
|
124
124
|
chunkLengthSchedule: [50, 90, 120, 150, 200],
|
@@ -5,7 +5,6 @@ const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
|
|
5
5
|
const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
|
6
6
|
const responseBody = await response.text();
|
7
7
|
try {
|
8
|
-
// TODO implement ElevenLabsError
|
9
8
|
return new ApiCallError_js_1.ApiCallError({
|
10
9
|
message: responseBody,
|
11
10
|
statusCode: response.status,
|
@@ -2,7 +2,6 @@ import { ApiCallError } from "../../core/api/ApiCallError.js";
|
|
2
2
|
export const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
|
3
3
|
const responseBody = await response.text();
|
4
4
|
try {
|
5
|
-
// TODO implement ElevenLabsError
|
6
5
|
return new ApiCallError({
|
7
6
|
message: responseBody,
|
8
7
|
statusCode: response.status,
|
@@ -15,11 +15,14 @@ const elevenLabsModels = [
|
|
15
15
|
"eleven_multilingual_v1",
|
16
16
|
"eleven_monolingual_v1",
|
17
17
|
];
|
18
|
-
const defaultModel = "
|
18
|
+
const defaultModel = "eleven_monolingual_v1";
|
19
19
|
/**
|
20
20
|
* Synthesize speech using the ElevenLabs Text to Speech API.
|
21
21
|
*
|
22
|
-
*
|
22
|
+
* Both regular text-to-speech and full duplex text-to-speech streaming are supported.
|
23
|
+
*
|
24
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech
|
25
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech-websockets
|
23
26
|
*/
|
24
27
|
class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
|
25
28
|
constructor(settings) {
|
@@ -84,7 +87,11 @@ class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
|
|
84
87
|
]);
|
85
88
|
const queue = new AsyncQueue_js_1.AsyncQueue();
|
86
89
|
const model = this.settings.model ?? defaultModel;
|
87
|
-
const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input
|
90
|
+
const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input${assembleQuery({
|
91
|
+
model_id: model,
|
92
|
+
optimize_streaming_latency: this.settings.optimizeStreamingLatency,
|
93
|
+
output_format: this.settings.outputFormat,
|
94
|
+
})}`);
|
88
95
|
socket.onopen = async () => {
|
89
96
|
const api = this.settings.api ?? new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration();
|
90
97
|
// send begin-of-stream (BOS) message:
|
@@ -158,9 +165,12 @@ class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
|
|
158
165
|
}
|
159
166
|
}
|
160
167
|
exports.ElevenLabsSpeechModel = ElevenLabsSpeechModel;
|
161
|
-
async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, voiceSettings, }) {
|
168
|
+
async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, optimizeStreamingLatency, outputFormat, voiceSettings, }) {
|
162
169
|
return (0, postToApi_js_1.postJsonToApi)({
|
163
|
-
url: api.assembleUrl(`/text-to-speech/${voiceId}
|
170
|
+
url: api.assembleUrl(`/text-to-speech/${voiceId}${assembleQuery({
|
171
|
+
optimize_streaming_latency: optimizeStreamingLatency,
|
172
|
+
output_format: outputFormat,
|
173
|
+
})}`),
|
164
174
|
headers: api.headers,
|
165
175
|
body: {
|
166
176
|
text,
|
@@ -172,6 +182,24 @@ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfigurat
|
|
172
182
|
abortSignal,
|
173
183
|
});
|
174
184
|
}
|
185
|
+
function assembleQuery(parameters) {
|
186
|
+
let query = "";
|
187
|
+
let hasQuestionMark = false;
|
188
|
+
for (const [key, value] of Object.entries(parameters)) {
|
189
|
+
if (value == null) {
|
190
|
+
continue;
|
191
|
+
}
|
192
|
+
if (!hasQuestionMark) {
|
193
|
+
query += "?";
|
194
|
+
hasQuestionMark = true;
|
195
|
+
}
|
196
|
+
else {
|
197
|
+
query += "&";
|
198
|
+
}
|
199
|
+
query += `${key}=${value}`;
|
200
|
+
}
|
201
|
+
return query;
|
202
|
+
}
|
175
203
|
function toApiVoiceSettings(voiceSettings) {
|
176
204
|
return voiceSettings != null
|
177
205
|
? {
|
@@ -11,6 +11,8 @@ export interface ElevenLabsSpeechModelSettings extends SpeechGenerationModelSett
|
|
11
11
|
};
|
12
12
|
voice: string;
|
13
13
|
model?: (typeof elevenLabsModels)[number] | (string & {});
|
14
|
+
optimizeStreamingLatency?: 0 | 1 | 2 | 3 | 4;
|
15
|
+
outputFormat?: "mp3_44100" | "pcm_16000" | "pcm_22050" | "pcm_24000" | "pcm_44100";
|
14
16
|
voiceSettings?: {
|
15
17
|
stability: number;
|
16
18
|
similarityBoost: number;
|
@@ -24,7 +26,10 @@ export interface ElevenLabsSpeechModelSettings extends SpeechGenerationModelSett
|
|
24
26
|
/**
|
25
27
|
* Synthesize speech using the ElevenLabs Text to Speech API.
|
26
28
|
*
|
27
|
-
*
|
29
|
+
* Both regular text-to-speech and full duplex text-to-speech streaming are supported.
|
30
|
+
*
|
31
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech
|
32
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech-websockets
|
28
33
|
*/
|
29
34
|
export declare class ElevenLabsSpeechModel extends AbstractModel<ElevenLabsSpeechModelSettings> implements StreamingSpeechGenerationModel<ElevenLabsSpeechModelSettings> {
|
30
35
|
constructor(settings: ElevenLabsSpeechModelSettings);
|
@@ -12,11 +12,14 @@ const elevenLabsModels = [
|
|
12
12
|
"eleven_multilingual_v1",
|
13
13
|
"eleven_monolingual_v1",
|
14
14
|
];
|
15
|
-
const defaultModel = "
|
15
|
+
const defaultModel = "eleven_monolingual_v1";
|
16
16
|
/**
|
17
17
|
* Synthesize speech using the ElevenLabs Text to Speech API.
|
18
18
|
*
|
19
|
-
*
|
19
|
+
* Both regular text-to-speech and full duplex text-to-speech streaming are supported.
|
20
|
+
*
|
21
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech
|
22
|
+
* @see https://docs.elevenlabs.io/api-reference/text-to-speech-websockets
|
20
23
|
*/
|
21
24
|
export class ElevenLabsSpeechModel extends AbstractModel {
|
22
25
|
constructor(settings) {
|
@@ -81,7 +84,11 @@ export class ElevenLabsSpeechModel extends AbstractModel {
|
|
81
84
|
]);
|
82
85
|
const queue = new AsyncQueue();
|
83
86
|
const model = this.settings.model ?? defaultModel;
|
84
|
-
const socket = await createSimpleWebSocket(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input
|
87
|
+
const socket = await createSimpleWebSocket(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input${assembleQuery({
|
88
|
+
model_id: model,
|
89
|
+
optimize_streaming_latency: this.settings.optimizeStreamingLatency,
|
90
|
+
output_format: this.settings.outputFormat,
|
91
|
+
})}`);
|
85
92
|
socket.onopen = async () => {
|
86
93
|
const api = this.settings.api ?? new ElevenLabsApiConfiguration();
|
87
94
|
// send begin-of-stream (BOS) message:
|
@@ -154,9 +161,12 @@ export class ElevenLabsSpeechModel extends AbstractModel {
|
|
154
161
|
});
|
155
162
|
}
|
156
163
|
}
|
157
|
-
async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, voiceSettings, }) {
|
164
|
+
async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, optimizeStreamingLatency, outputFormat, voiceSettings, }) {
|
158
165
|
return postJsonToApi({
|
159
|
-
url: api.assembleUrl(`/text-to-speech/${voiceId}
|
166
|
+
url: api.assembleUrl(`/text-to-speech/${voiceId}${assembleQuery({
|
167
|
+
optimize_streaming_latency: optimizeStreamingLatency,
|
168
|
+
output_format: outputFormat,
|
169
|
+
})}`),
|
160
170
|
headers: api.headers,
|
161
171
|
body: {
|
162
172
|
text,
|
@@ -168,6 +178,24 @@ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfigurat
|
|
168
178
|
abortSignal,
|
169
179
|
});
|
170
180
|
}
|
181
|
+
function assembleQuery(parameters) {
|
182
|
+
let query = "";
|
183
|
+
let hasQuestionMark = false;
|
184
|
+
for (const [key, value] of Object.entries(parameters)) {
|
185
|
+
if (value == null) {
|
186
|
+
continue;
|
187
|
+
}
|
188
|
+
if (!hasQuestionMark) {
|
189
|
+
query += "?";
|
190
|
+
hasQuestionMark = true;
|
191
|
+
}
|
192
|
+
else {
|
193
|
+
query += "&";
|
194
|
+
}
|
195
|
+
query += `${key}=${value}`;
|
196
|
+
}
|
197
|
+
return query;
|
198
|
+
}
|
171
199
|
function toApiVoiceSettings(voiceSettings) {
|
172
200
|
return voiceSettings != null
|
173
201
|
? {
|
@@ -5,7 +5,6 @@ const ApiCallError_js_1 = require("../../core/api/ApiCallError.cjs");
|
|
5
5
|
const failedLmntCallResponseHandler = async ({ response, url, requestBodyValues }) => {
|
6
6
|
const responseBody = await response.text();
|
7
7
|
try {
|
8
|
-
// TODO implement LmntError
|
9
8
|
return new ApiCallError_js_1.ApiCallError({
|
10
9
|
message: responseBody,
|
11
10
|
statusCode: response.status,
|
@@ -2,7 +2,6 @@ import { ApiCallError } from "../../core/api/ApiCallError.js";
|
|
2
2
|
export const failedLmntCallResponseHandler = async ({ response, url, requestBodyValues }) => {
|
3
3
|
const responseBody = await response.text();
|
4
4
|
try {
|
5
|
-
// TODO implement LmntError
|
6
5
|
return new ApiCallError({
|
7
6
|
message: responseBody,
|
8
7
|
statusCode: response.status,
|
package/package.json
CHANGED