modelfusion 0.17.0 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -14
- package/model-function/synthesize-speech/synthesizeSpeech.d.ts +3 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +12 -1
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +7 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +12 -1
- package/model-provider/openai/OpenAITextGenerationModel.cjs +8 -0
- package/model-provider/openai/OpenAITextGenerationModel.d.ts +10 -2
- package/model-provider/openai/OpenAITextGenerationModel.js +8 -0
- package/model-provider/openai/TikTokenTokenizer.cjs +5 -1
- package/model-provider/openai/TikTokenTokenizer.js +5 -1
- package/package.json +1 -1
package/README.md
CHANGED
@@ -20,7 +20,7 @@ ModelFusion is a library for building AI apps, chatbots, and agents. It provides
|
|
20
20
|
- **Type inference and validation**: ModelFusion uses TypeScript and [Zod](https://github.com/colinhacks/zod) to infer types wherever possible and to validate model responses.
|
21
21
|
- **Flexibility and control**: AI application development can be complex and unique to each project. With ModelFusion, you have complete control over the prompts and model settings, and you can access the raw responses from the models quickly to build what you need.
|
22
22
|
- **No chains and predefined prompts**: Use the concepts provided by JavaScript (variables, functions, etc.) and explicit prompts to build applications you can easily understand and control. Not black magic.
|
23
|
-
- **
|
23
|
+
- **Multimodal Support**: Beyond just LLMs, ModelFusion encompasses a diverse array of models including text generation, text-to-speech, speech-to-text, and image generation, allowing you to build multifaceted AI applications with ease.
|
24
24
|
- **Integrated support features**: Essential features like logging, retries, throttling, tracing, and error handling are built-in, helping you focus more on building your application.
|
25
25
|
|
26
26
|
## Quick Install
|
@@ -274,7 +274,7 @@ const transcription = await transcribe(
|
|
274
274
|
);
|
275
275
|
```
|
276
276
|
|
277
|
-
### Synthesize Speech
|
277
|
+
### [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
|
278
278
|
|
279
279
|
Turn text into speech (audio).
|
280
280
|
|
@@ -371,6 +371,7 @@ const { chunks } = await retrieveTextChunks(
|
|
371
371
|
- [Embed Text](https://modelfusion.dev/guide/function/embed-text)
|
372
372
|
- [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
|
373
373
|
- [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
|
374
|
+
- [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
|
374
375
|
- [Generate images](https://modelfusion.dev/guide/function/generate-image)
|
375
376
|
- Summarize text
|
376
377
|
- [Tools](https://modelfusion.dev/guide/tools)
|
@@ -389,18 +390,30 @@ const { chunks } = await retrieveTextChunks(
|
|
389
390
|
|
390
391
|
### Model Providers
|
391
392
|
|
392
|
-
|
393
|
-
|
394
|
-
|
|
395
|
-
|
|
396
|
-
| [
|
397
|
-
| [
|
398
|
-
| [Generate JSON
|
399
|
-
| [
|
400
|
-
| [
|
401
|
-
| [
|
402
|
-
|
403
|
-
|
393
|
+
#### Text and JSON Generation
|
394
|
+
|
395
|
+
| | [OpenAI](https://modelfusion.dev/integration/model-provider/openai) | [Cohere](https://modelfusion.dev/integration/model-provider/cohere) | [Llama.cpp](https://modelfusion.dev/integration/model-provider/llamacpp) | [Hugging Face](https://modelfusion.dev/integration/model-provider/huggingface) |
|
396
|
+
| ------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------ |
|
397
|
+
| [Generate text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | ✅ |
|
398
|
+
| [Stream text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | |
|
399
|
+
| [Generate JSON](https://modelfusion.dev/guide/function/generate-json) | chat models | | | |
|
400
|
+
| [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models | | | |
|
401
|
+
| [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ | ✅ |
|
402
|
+
| [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | |
|
403
|
+
|
404
|
+
#### Image Generation
|
405
|
+
|
406
|
+
- [OpenAI (Dall·E)](https://modelfusion.dev/integration/model-provider/openai)
|
407
|
+
- [Stability AI](https://modelfusion.dev/integration/model-provider/stability)
|
408
|
+
- [Automatic1111](https://modelfusion.dev/integration/model-provider/automatic1111)
|
409
|
+
|
410
|
+
#### Speech Transcription
|
411
|
+
|
412
|
+
- [OpenAI (Whisper)](https://modelfusion.dev/integration/model-provider/openai)
|
413
|
+
|
414
|
+
#### Speech Synthesis
|
415
|
+
|
416
|
+
- [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs)
|
404
417
|
|
405
418
|
### Vector Indices
|
406
419
|
|
@@ -2,6 +2,9 @@
|
|
2
2
|
import { FunctionOptions } from "../FunctionOptions.js";
|
3
3
|
import { CallMetadata } from "../executeCall.js";
|
4
4
|
import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
|
5
|
+
/**
|
6
|
+
* Synthesizes speech from text.
|
7
|
+
*/
|
5
8
|
export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options: FunctionOptions<SETTINGS> & {
|
6
9
|
fullResponse: true;
|
7
10
|
}): Promise<{
|
@@ -44,6 +44,8 @@ class ElevenLabsSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
|
|
44
44
|
apiKey: this.apiKey,
|
45
45
|
text,
|
46
46
|
voiceId: combinedSettings.voice,
|
47
|
+
modelId: combinedSettings.model,
|
48
|
+
voiceSettings: combinedSettings.voiceSettings,
|
47
49
|
}),
|
48
50
|
});
|
49
51
|
}
|
@@ -61,7 +63,7 @@ exports.ElevenLabsSpeechSynthesisModel = ElevenLabsSpeechSynthesisModel;
|
|
61
63
|
/**
|
62
64
|
* @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
|
63
65
|
*/
|
64
|
-
async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, }) {
|
66
|
+
async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
|
65
67
|
return (0, postToApi_js_1.postJsonToApi)({
|
66
68
|
url: `${baseUrl}/text-to-speech/${voiceId}`,
|
67
69
|
headers: {
|
@@ -69,6 +71,15 @@ async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs
|
|
69
71
|
},
|
70
72
|
body: {
|
71
73
|
text,
|
74
|
+
model_id: modelId,
|
75
|
+
voice_settings: voiceSettings != null
|
76
|
+
? {
|
77
|
+
stability: voiceSettings.stability,
|
78
|
+
similarity_boost: voiceSettings.similarityBoost,
|
79
|
+
style: voiceSettings.style,
|
80
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
81
|
+
}
|
82
|
+
: undefined,
|
72
83
|
},
|
73
84
|
failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
|
74
85
|
successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
|
@@ -8,6 +8,13 @@ export interface ElevenLabsSpeechSynthesisModelSettings extends SpeechSynthesisM
|
|
8
8
|
voice: string;
|
9
9
|
baseUrl?: string;
|
10
10
|
apiKey?: string;
|
11
|
+
model?: string;
|
12
|
+
voiceSettings?: {
|
13
|
+
stability: number;
|
14
|
+
similarityBoost: number;
|
15
|
+
style?: number;
|
16
|
+
useSpeakerBoost?: boolean;
|
17
|
+
};
|
11
18
|
retry?: RetryFunction;
|
12
19
|
throttle?: ThrottleFunction;
|
13
20
|
}
|
@@ -41,6 +41,8 @@ export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
|
|
41
41
|
apiKey: this.apiKey,
|
42
42
|
text,
|
43
43
|
voiceId: combinedSettings.voice,
|
44
|
+
modelId: combinedSettings.model,
|
45
|
+
voiceSettings: combinedSettings.voiceSettings,
|
44
46
|
}),
|
45
47
|
});
|
46
48
|
}
|
@@ -57,7 +59,7 @@ export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
|
|
57
59
|
/**
|
58
60
|
* @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
|
59
61
|
*/
|
60
|
-
async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, }) {
|
62
|
+
async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
|
61
63
|
return postJsonToApi({
|
62
64
|
url: `${baseUrl}/text-to-speech/${voiceId}`,
|
63
65
|
headers: {
|
@@ -65,6 +67,15 @@ async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs
|
|
65
67
|
},
|
66
68
|
body: {
|
67
69
|
text,
|
70
|
+
model_id: modelId,
|
71
|
+
voice_settings: voiceSettings != null
|
72
|
+
? {
|
73
|
+
stability: voiceSettings.stability,
|
74
|
+
similarity_boost: voiceSettings.similarityBoost,
|
75
|
+
style: voiceSettings.style,
|
76
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
77
|
+
}
|
78
|
+
: undefined,
|
68
79
|
},
|
69
80
|
failedResponseHandler: failedElevenLabsCallResponseHandler,
|
70
81
|
successfulResponseHandler: createAudioMpegResponseHandler(),
|
@@ -20,6 +20,14 @@ const TikTokenTokenizer_js_1 = require("./TikTokenTokenizer.cjs");
|
|
20
20
|
* @see https://openai.com/pricing
|
21
21
|
*/
|
22
22
|
exports.OPENAI_TEXT_GENERATION_MODELS = {
|
23
|
+
"davinci-002": {
|
24
|
+
contextWindowSize: 16384,
|
25
|
+
tokenCostInMillicents: 0.2,
|
26
|
+
},
|
27
|
+
"babbage-002": {
|
28
|
+
contextWindowSize: 16384,
|
29
|
+
tokenCostInMillicents: 0.04,
|
30
|
+
},
|
23
31
|
"text-davinci-003": {
|
24
32
|
contextWindowSize: 4096,
|
25
33
|
tokenCostInMillicents: 2,
|
@@ -16,6 +16,14 @@ import { TikTokenTokenizer } from "./TikTokenTokenizer.js";
|
|
16
16
|
* @see https://openai.com/pricing
|
17
17
|
*/
|
18
18
|
export declare const OPENAI_TEXT_GENERATION_MODELS: {
|
19
|
+
"davinci-002": {
|
20
|
+
contextWindowSize: number;
|
21
|
+
tokenCostInMillicents: number;
|
22
|
+
};
|
23
|
+
"babbage-002": {
|
24
|
+
contextWindowSize: number;
|
25
|
+
tokenCostInMillicents: number;
|
26
|
+
};
|
19
27
|
"text-davinci-003": {
|
20
28
|
contextWindowSize: number;
|
21
29
|
tokenCostInMillicents: number;
|
@@ -58,7 +66,7 @@ export declare const OPENAI_TEXT_GENERATION_MODELS: {
|
|
58
66
|
};
|
59
67
|
};
|
60
68
|
export type OpenAITextGenerationModelType = keyof typeof OPENAI_TEXT_GENERATION_MODELS;
|
61
|
-
export declare const isOpenAITextGenerationModel: (model: string) => model is "text-davinci-003" | "text-davinci-002" | "code-davinci-002" | "davinci" | "text-curie-001" | "curie" | "text-babbage-001" | "babbage" | "text-ada-001" | "ada";
|
69
|
+
export declare const isOpenAITextGenerationModel: (model: string) => model is "davinci-002" | "babbage-002" | "text-davinci-003" | "text-davinci-002" | "code-davinci-002" | "davinci" | "text-curie-001" | "curie" | "text-babbage-001" | "babbage" | "text-ada-001" | "ada";
|
62
70
|
export declare const calculateOpenAITextGenerationCostInMillicents: ({ model, response, }: {
|
63
71
|
model: OpenAITextGenerationModelType;
|
64
72
|
response: OpenAITextGenerationResponse;
|
@@ -102,7 +110,7 @@ export interface OpenAITextGenerationModelSettings extends TextGenerationModelSe
|
|
102
110
|
export declare class OpenAITextGenerationModel extends AbstractModel<OpenAITextGenerationModelSettings> implements TextGenerationModel<string, OpenAITextGenerationResponse, OpenAITextGenerationDelta, OpenAITextGenerationModelSettings> {
|
103
111
|
constructor(settings: OpenAITextGenerationModelSettings);
|
104
112
|
readonly provider: "openai";
|
105
|
-
get modelName(): "text-davinci-003" | "text-davinci-002" | "code-davinci-002" | "davinci" | "text-curie-001" | "curie" | "text-babbage-001" | "babbage" | "text-ada-001" | "ada";
|
113
|
+
get modelName(): "davinci-002" | "babbage-002" | "text-davinci-003" | "text-davinci-002" | "code-davinci-002" | "davinci" | "text-curie-001" | "curie" | "text-babbage-001" | "babbage" | "text-ada-001" | "ada";
|
106
114
|
readonly contextWindowSize: number;
|
107
115
|
readonly tokenizer: TikTokenTokenizer;
|
108
116
|
private get apiKey();
|
@@ -14,6 +14,14 @@ import { TikTokenTokenizer } from "./TikTokenTokenizer.js";
|
|
14
14
|
* @see https://openai.com/pricing
|
15
15
|
*/
|
16
16
|
export const OPENAI_TEXT_GENERATION_MODELS = {
|
17
|
+
"davinci-002": {
|
18
|
+
contextWindowSize: 16384,
|
19
|
+
tokenCostInMillicents: 0.2,
|
20
|
+
},
|
21
|
+
"babbage-002": {
|
22
|
+
contextWindowSize: 16384,
|
23
|
+
tokenCostInMillicents: 0.04,
|
24
|
+
},
|
17
25
|
"text-davinci-003": {
|
18
26
|
contextWindowSize: 4096,
|
19
27
|
tokenCostInMillicents: 2,
|
@@ -57,13 +57,17 @@ function getEncodingNameForModel(model) {
|
|
57
57
|
case "text-davinci-003": {
|
58
58
|
return "p50k_base";
|
59
59
|
}
|
60
|
+
case "babbage-002":
|
61
|
+
case "davinci-002":
|
60
62
|
case "ada":
|
61
63
|
case "babbage":
|
62
64
|
case "curie":
|
63
65
|
case "davinci":
|
64
66
|
case "text-ada-001":
|
65
67
|
case "text-babbage-001":
|
66
|
-
case "text-curie-001":
|
68
|
+
case "text-curie-001": {
|
69
|
+
return "r50k_base";
|
70
|
+
}
|
67
71
|
case "gpt-3.5-turbo":
|
68
72
|
case "gpt-3.5-turbo-0301":
|
69
73
|
case "gpt-3.5-turbo-0613":
|
@@ -53,13 +53,17 @@ function getEncodingNameForModel(model) {
|
|
53
53
|
case "text-davinci-003": {
|
54
54
|
return "p50k_base";
|
55
55
|
}
|
56
|
+
case "babbage-002":
|
57
|
+
case "davinci-002":
|
56
58
|
case "ada":
|
57
59
|
case "babbage":
|
58
60
|
case "curie":
|
59
61
|
case "davinci":
|
60
62
|
case "text-ada-001":
|
61
63
|
case "text-babbage-001":
|
62
|
-
case "text-curie-001":
|
64
|
+
case "text-curie-001": {
|
65
|
+
return "r50k_base";
|
66
|
+
}
|
63
67
|
case "gpt-3.5-turbo":
|
64
68
|
case "gpt-3.5-turbo-0301":
|
65
69
|
case "gpt-3.5-turbo-0613":
|