modelfusion 0.16.0 → 0.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +43 -16
- package/model-function/ModelCallEvent.d.ts +4 -3
- package/model-function/SuccessfulModelCall.cjs +1 -0
- package/model-function/SuccessfulModelCall.d.ts +1 -1
- package/model-function/SuccessfulModelCall.js +1 -0
- package/model-function/index.cjs +6 -3
- package/model-function/index.d.ts +6 -3
- package/model-function/index.js +6 -3
- package/model-function/synthesize-speech/SpeechSynthesisEvent.d.ts +22 -0
- package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts +11 -0
- package/model-function/synthesize-speech/synthesizeSpeech.cjs +49 -0
- package/model-function/synthesize-speech/synthesizeSpeech.d.ts +16 -0
- package/model-function/synthesize-speech/synthesizeSpeech.js +45 -0
- package/model-function/transcribe-speech/TranscriptionEvent.cjs +2 -0
- package/model-function/transcribe-speech/TranscriptionEvent.js +1 -0
- package/model-function/transcribe-speech/TranscriptionModel.cjs +2 -0
- package/model-function/transcribe-speech/TranscriptionModel.js +1 -0
- package/model-provider/elevenlabs/ElevenLabsError.cjs +31 -0
- package/model-provider/elevenlabs/ElevenLabsError.d.ts +3 -0
- package/model-provider/elevenlabs/ElevenLabsError.js +27 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +88 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +29 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +84 -0
- package/model-provider/elevenlabs/index.cjs +17 -0
- package/model-provider/elevenlabs/index.d.ts +1 -0
- package/model-provider/elevenlabs/index.js +1 -0
- package/model-provider/index.cjs +1 -0
- package/model-provider/index.d.ts +1 -0
- package/model-provider/index.js +1 -0
- package/model-provider/openai/OpenAITranscriptionModel.d.ts +1 -1
- package/package.json +1 -1
- package/util/api/postToApi.cjs +14 -1
- package/util/api/postToApi.d.ts +2 -0
- package/util/api/postToApi.js +12 -0
- /package/model-function/{transcribe-audio/TranscriptionEvent.cjs → synthesize-speech/SpeechSynthesisEvent.cjs} +0 -0
- /package/model-function/{transcribe-audio/TranscriptionEvent.js → synthesize-speech/SpeechSynthesisEvent.js} +0 -0
- /package/model-function/{transcribe-audio/TranscriptionModel.cjs → synthesize-speech/SpeechSynthesisModel.cjs} +0 -0
- /package/model-function/{transcribe-audio/TranscriptionModel.js → synthesize-speech/SpeechSynthesisModel.js} +0 -0
- /package/model-function/{transcribe-audio → transcribe-speech}/TranscriptionEvent.d.ts +0 -0
- /package/model-function/{transcribe-audio → transcribe-speech}/TranscriptionModel.d.ts +0 -0
- /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.cjs +0 -0
- /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.d.ts +0 -0
- /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.js +0 -0
package/README.md
CHANGED
@@ -20,7 +20,7 @@ ModelFusion is a library for building AI apps, chatbots, and agents. It provides
|
|
20
20
|
- **Type inference and validation**: ModelFusion uses TypeScript and [Zod](https://github.com/colinhacks/zod) to infer types wherever possible and to validate model responses.
|
21
21
|
- **Flexibility and control**: AI application development can be complex and unique to each project. With ModelFusion, you have complete control over the prompts and model settings, and you can access the raw responses from the models quickly to build what you need.
|
22
22
|
- **No chains and predefined prompts**: Use the concepts provided by JavaScript (variables, functions, etc.) and explicit prompts to build applications you can easily understand and control. Not black magic.
|
23
|
-
- **
|
23
|
+
- **Multimodal Support**: Beyond just LLMs, ModelFusion encompasses a diverse array of models including text generation, text-to-speech, speech-to-text, and image generation, allowing you to build multifaceted AI applications with ease.
|
24
24
|
- **Integrated support features**: Essential features like logging, retries, throttling, tracing, and error handling are built-in, helping you focus more on building your application.
|
25
25
|
|
26
26
|
## Quick Install
|
@@ -260,9 +260,9 @@ const { tool, parameters, result, text } = await useToolOrGenerateText(
|
|
260
260
|
);
|
261
261
|
```
|
262
262
|
|
263
|
-
### [Transcribe
|
263
|
+
### [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
|
264
264
|
|
265
|
-
Turn
|
265
|
+
Turn speech (audio) into text.
|
266
266
|
|
267
267
|
```ts
|
268
268
|
const transcription = await transcribe(
|
@@ -274,6 +274,20 @@ const transcription = await transcribe(
|
|
274
274
|
);
|
275
275
|
```
|
276
276
|
|
277
|
+
### [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
|
278
|
+
|
279
|
+
Turn text into speech (audio).
|
280
|
+
|
281
|
+
```ts
|
282
|
+
// `speech` is a Buffer with MP3 audio data
|
283
|
+
const speech = await synthesizeSpeech(
|
284
|
+
new ElevenLabsSpeechSynthesisModel({
|
285
|
+
voice: "ErXwobaYiN019PkySvjV",
|
286
|
+
}),
|
287
|
+
"Hello, World!"
|
288
|
+
);
|
289
|
+
```
|
290
|
+
|
277
291
|
### [Generate Image](https://modelfusion.dev/guide/function/generate-image)
|
278
292
|
|
279
293
|
Generate a base64-encoded image from a prompt.
|
@@ -356,7 +370,8 @@ const { chunks } = await retrieveTextChunks(
|
|
356
370
|
- [Generate JSON or text](https://modelfusion.dev/guide/function/generate-json-or-text)
|
357
371
|
- [Embed Text](https://modelfusion.dev/guide/function/embed-text)
|
358
372
|
- [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
|
359
|
-
- [Transcribe
|
373
|
+
- [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
|
374
|
+
- [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
|
360
375
|
- [Generate images](https://modelfusion.dev/guide/function/generate-image)
|
361
376
|
- Summarize text
|
362
377
|
- [Tools](https://modelfusion.dev/guide/tools)
|
@@ -375,18 +390,30 @@ const { chunks } = await retrieveTextChunks(
|
|
375
390
|
|
376
391
|
### Model Providers
|
377
392
|
|
378
|
-
|
379
|
-
|
380
|
-
|
|
381
|
-
|
|
382
|
-
| [
|
383
|
-
| [
|
384
|
-
| [Generate JSON
|
385
|
-
| [
|
386
|
-
| [
|
387
|
-
| [
|
388
|
-
|
389
|
-
|
393
|
+
#### Text and JSON Generation
|
394
|
+
|
395
|
+
| | [OpenAI](https://modelfusion.dev/integration/model-provider/openai) | [Cohere](https://modelfusion.dev/integration/model-provider/cohere) | [Llama.cpp](https://modelfusion.dev/integration/model-provider/llamacpp) | [Hugging Face](https://modelfusion.dev/integration/model-provider/huggingface) |
|
396
|
+
| ------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------ |
|
397
|
+
| [Generate text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | ✅ |
|
398
|
+
| [Stream text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | |
|
399
|
+
| [Generate JSON](https://modelfusion.dev/guide/function/generate-json) | chat models | | | |
|
400
|
+
| [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models | | | |
|
401
|
+
| [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ | ✅ |
|
402
|
+
| [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | |
|
403
|
+
|
404
|
+
#### Image Generation
|
405
|
+
|
406
|
+
- [OpenAI (Dall·E)](https://modelfusion.dev/integration/model-provider/openai)
|
407
|
+
- [Stability AI](https://modelfusion.dev/integration/model-provider/stability)
|
408
|
+
- [Automatic1111](https://modelfusion.dev/integration/model-provider/automatic1111)
|
409
|
+
|
410
|
+
#### Speech Transcription
|
411
|
+
|
412
|
+
- [OpenAI (Whisper)](https://modelfusion.dev/integration/model-provider/openai)
|
413
|
+
|
414
|
+
#### Speech Synthesis
|
415
|
+
|
416
|
+
- [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs)
|
390
417
|
|
391
418
|
### Vector Indices
|
392
419
|
|
@@ -5,12 +5,13 @@ import { ImageGenerationFinishedEvent, ImageGenerationStartedEvent } from "./gen
|
|
5
5
|
import { JsonGenerationFinishedEvent, JsonGenerationStartedEvent } from "./generate-json/JsonGenerationEvent.js";
|
6
6
|
import { TextGenerationFinishedEvent, TextGenerationStartedEvent } from "./generate-text/TextGenerationEvent.js";
|
7
7
|
import { TextStreamingFinishedEvent, TextStreamingStartedEvent } from "./generate-text/TextStreamingEvent.js";
|
8
|
-
import {
|
8
|
+
import { SpeechSynthesisFinishedEvent, SpeechSynthesisStartedEvent } from "./synthesize-speech/SpeechSynthesisEvent.js";
|
9
|
+
import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-speech/TranscriptionEvent.js";
|
9
10
|
export type ModelCallStartedEventMetadata = RunFunctionStartedEventMetadata & {
|
10
11
|
model: ModelInformation;
|
11
12
|
};
|
12
|
-
export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
|
13
|
+
export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
|
13
14
|
export type ModelCallFinishedEventMetadata = RunFunctionFinishedEventMetadata & {
|
14
15
|
model: ModelInformation;
|
15
16
|
};
|
16
|
-
export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
|
17
|
+
export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
|
@@ -18,6 +18,7 @@ const eventTypeToCostType = {
|
|
18
18
|
"image-generation-finished": "image-generation",
|
19
19
|
"json-generation-finished": "json-generation",
|
20
20
|
"json-or-text-generation-finished": "json-or-text-generation",
|
21
|
+
"speech-synthesis-finished": "speech-synthesis",
|
21
22
|
"text-embedding-finished": "text-embedding",
|
22
23
|
"text-generation-finished": "text-generation",
|
23
24
|
"text-streaming-finished": "text-streaming",
|
@@ -1,7 +1,7 @@
|
|
1
1
|
import { RunFunctionEvent } from "../run/RunFunctionEvent.js";
|
2
2
|
import { ModelInformation } from "./ModelInformation.js";
|
3
3
|
export type SuccessfulModelCall = {
|
4
|
-
type: "image-generation" | "json-generation" | "json-or-text-generation" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
|
4
|
+
type: "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
|
5
5
|
model: ModelInformation;
|
6
6
|
settings: unknown;
|
7
7
|
response: unknown;
|
@@ -14,6 +14,7 @@ const eventTypeToCostType = {
|
|
14
14
|
"image-generation-finished": "image-generation",
|
15
15
|
"json-generation-finished": "json-generation",
|
16
16
|
"json-or-text-generation-finished": "json-or-text-generation",
|
17
|
+
"speech-synthesis-finished": "speech-synthesis",
|
17
18
|
"text-embedding-finished": "text-embedding",
|
18
19
|
"text-generation-finished": "text-generation",
|
19
20
|
"text-streaming-finished": "text-streaming",
|
package/model-function/index.cjs
CHANGED
@@ -41,8 +41,11 @@ __exportStar(require("./generate-text/TextGenerationEvent.cjs"), exports);
|
|
41
41
|
__exportStar(require("./generate-text/TextGenerationModel.cjs"), exports);
|
42
42
|
__exportStar(require("./generate-text/generateText.cjs"), exports);
|
43
43
|
__exportStar(require("./generate-text/streamText.cjs"), exports);
|
44
|
+
__exportStar(require("./synthesize-speech/SpeechSynthesisEvent.cjs"), exports);
|
45
|
+
__exportStar(require("./synthesize-speech/SpeechSynthesisModel.cjs"), exports);
|
46
|
+
__exportStar(require("./synthesize-speech/synthesizeSpeech.cjs"), exports);
|
44
47
|
__exportStar(require("./tokenize-text/Tokenizer.cjs"), exports);
|
45
48
|
__exportStar(require("./tokenize-text/countTokens.cjs"), exports);
|
46
|
-
__exportStar(require("./transcribe-
|
47
|
-
__exportStar(require("./transcribe-
|
48
|
-
__exportStar(require("./transcribe-
|
49
|
+
__exportStar(require("./transcribe-speech/TranscriptionEvent.cjs"), exports);
|
50
|
+
__exportStar(require("./transcribe-speech/TranscriptionModel.cjs"), exports);
|
51
|
+
__exportStar(require("./transcribe-speech/transcribe.cjs"), exports);
|
@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
|
|
25
25
|
export * from "./generate-text/TextGenerationModel.js";
|
26
26
|
export * from "./generate-text/generateText.js";
|
27
27
|
export * from "./generate-text/streamText.js";
|
28
|
+
export * from "./synthesize-speech/SpeechSynthesisEvent.js";
|
29
|
+
export * from "./synthesize-speech/SpeechSynthesisModel.js";
|
30
|
+
export * from "./synthesize-speech/synthesizeSpeech.js";
|
28
31
|
export * from "./tokenize-text/Tokenizer.js";
|
29
32
|
export * from "./tokenize-text/countTokens.js";
|
30
|
-
export * from "./transcribe-
|
31
|
-
export * from "./transcribe-
|
32
|
-
export * from "./transcribe-
|
33
|
+
export * from "./transcribe-speech/TranscriptionEvent.js";
|
34
|
+
export * from "./transcribe-speech/TranscriptionModel.js";
|
35
|
+
export * from "./transcribe-speech/transcribe.js";
|
package/model-function/index.js
CHANGED
@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
|
|
25
25
|
export * from "./generate-text/TextGenerationModel.js";
|
26
26
|
export * from "./generate-text/generateText.js";
|
27
27
|
export * from "./generate-text/streamText.js";
|
28
|
+
export * from "./synthesize-speech/SpeechSynthesisEvent.js";
|
29
|
+
export * from "./synthesize-speech/SpeechSynthesisModel.js";
|
30
|
+
export * from "./synthesize-speech/synthesizeSpeech.js";
|
28
31
|
export * from "./tokenize-text/Tokenizer.js";
|
29
32
|
export * from "./tokenize-text/countTokens.js";
|
30
|
-
export * from "./transcribe-
|
31
|
-
export * from "./transcribe-
|
32
|
-
export * from "./transcribe-
|
33
|
+
export * from "./transcribe-speech/TranscriptionEvent.js";
|
34
|
+
export * from "./transcribe-speech/TranscriptionModel.js";
|
35
|
+
export * from "./transcribe-speech/transcribe.js";
|
@@ -0,0 +1,22 @@
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
2
|
+
import { ModelCallFinishedEventMetadata, ModelCallStartedEventMetadata } from "../ModelCallEvent.js";
|
3
|
+
export type SpeechSynthesisStartedEvent = {
|
4
|
+
type: "speech-synthesis-started";
|
5
|
+
metadata: ModelCallStartedEventMetadata;
|
6
|
+
settings: unknown;
|
7
|
+
text: string;
|
8
|
+
};
|
9
|
+
export type SpeechSynthesisFinishedEvent = {
|
10
|
+
type: "speech-synthesis-finished";
|
11
|
+
metadata: ModelCallFinishedEventMetadata;
|
12
|
+
settings: unknown;
|
13
|
+
text: string;
|
14
|
+
} & ({
|
15
|
+
status: "success";
|
16
|
+
response: Buffer;
|
17
|
+
} | {
|
18
|
+
status: "failure";
|
19
|
+
error: unknown;
|
20
|
+
} | {
|
21
|
+
status: "abort";
|
22
|
+
});
|
@@ -0,0 +1,11 @@
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
2
|
+
import { FunctionOptions } from "../FunctionOptions.js";
|
3
|
+
import { Model, ModelSettings } from "../Model.js";
|
4
|
+
export interface SpeechSynthesisModelSettings extends ModelSettings {
|
5
|
+
}
|
6
|
+
export interface SpeechSynthesisModel<SETTINGS> extends Model<SETTINGS> {
|
7
|
+
/**
|
8
|
+
* Generates an mp3 audio buffer that contains the speech for the given text.
|
9
|
+
*/
|
10
|
+
generateSpeechResponse: (text: string, options?: FunctionOptions<SETTINGS>) => PromiseLike<Buffer>;
|
11
|
+
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.synthesizeSpeech = void 0;
|
4
|
+
const executeCall_js_1 = require("../executeCall.cjs");
|
5
|
+
async function synthesizeSpeech(model, text, options) {
|
6
|
+
const result = await (0, executeCall_js_1.executeCall)({
|
7
|
+
model,
|
8
|
+
options,
|
9
|
+
generateResponse: (options) => model.generateSpeechResponse(text, options),
|
10
|
+
extractOutputValue: (buffer) => buffer,
|
11
|
+
getStartEvent: (metadata, settings) => ({
|
12
|
+
type: "speech-synthesis-started",
|
13
|
+
metadata,
|
14
|
+
settings,
|
15
|
+
text,
|
16
|
+
}),
|
17
|
+
getAbortEvent: (metadata, settings) => ({
|
18
|
+
type: "speech-synthesis-finished",
|
19
|
+
status: "abort",
|
20
|
+
settings,
|
21
|
+
metadata,
|
22
|
+
text,
|
23
|
+
}),
|
24
|
+
getFailureEvent: (metadata, settings, error) => ({
|
25
|
+
type: "speech-synthesis-finished",
|
26
|
+
status: "failure",
|
27
|
+
metadata,
|
28
|
+
settings,
|
29
|
+
text,
|
30
|
+
error,
|
31
|
+
}),
|
32
|
+
getSuccessEvent: (metadata, settings, response, output) => ({
|
33
|
+
type: "speech-synthesis-finished",
|
34
|
+
status: "success",
|
35
|
+
metadata,
|
36
|
+
settings,
|
37
|
+
text,
|
38
|
+
response,
|
39
|
+
speech: output,
|
40
|
+
}),
|
41
|
+
});
|
42
|
+
return options?.fullResponse === true
|
43
|
+
? {
|
44
|
+
speech: result.output,
|
45
|
+
metadata: result.metadata,
|
46
|
+
}
|
47
|
+
: result.output;
|
48
|
+
}
|
49
|
+
exports.synthesizeSpeech = synthesizeSpeech;
|
@@ -0,0 +1,16 @@
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
2
|
+
import { FunctionOptions } from "../FunctionOptions.js";
|
3
|
+
import { CallMetadata } from "../executeCall.js";
|
4
|
+
import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
|
5
|
+
/**
|
6
|
+
* Synthesizes speech from text.
|
7
|
+
*/
|
8
|
+
export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options: FunctionOptions<SETTINGS> & {
|
9
|
+
fullResponse: true;
|
10
|
+
}): Promise<{
|
11
|
+
speech: Buffer;
|
12
|
+
metadata: CallMetadata<SpeechSynthesisModel<SETTINGS>>;
|
13
|
+
}>;
|
14
|
+
export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options?: FunctionOptions<SETTINGS> & {
|
15
|
+
fullResponse?: false;
|
16
|
+
}): Promise<Buffer>;
|
@@ -0,0 +1,45 @@
|
|
1
|
+
import { executeCall } from "../executeCall.js";
|
2
|
+
export async function synthesizeSpeech(model, text, options) {
|
3
|
+
const result = await executeCall({
|
4
|
+
model,
|
5
|
+
options,
|
6
|
+
generateResponse: (options) => model.generateSpeechResponse(text, options),
|
7
|
+
extractOutputValue: (buffer) => buffer,
|
8
|
+
getStartEvent: (metadata, settings) => ({
|
9
|
+
type: "speech-synthesis-started",
|
10
|
+
metadata,
|
11
|
+
settings,
|
12
|
+
text,
|
13
|
+
}),
|
14
|
+
getAbortEvent: (metadata, settings) => ({
|
15
|
+
type: "speech-synthesis-finished",
|
16
|
+
status: "abort",
|
17
|
+
settings,
|
18
|
+
metadata,
|
19
|
+
text,
|
20
|
+
}),
|
21
|
+
getFailureEvent: (metadata, settings, error) => ({
|
22
|
+
type: "speech-synthesis-finished",
|
23
|
+
status: "failure",
|
24
|
+
metadata,
|
25
|
+
settings,
|
26
|
+
text,
|
27
|
+
error,
|
28
|
+
}),
|
29
|
+
getSuccessEvent: (metadata, settings, response, output) => ({
|
30
|
+
type: "speech-synthesis-finished",
|
31
|
+
status: "success",
|
32
|
+
metadata,
|
33
|
+
settings,
|
34
|
+
text,
|
35
|
+
response,
|
36
|
+
speech: output,
|
37
|
+
}),
|
38
|
+
});
|
39
|
+
return options?.fullResponse === true
|
40
|
+
? {
|
41
|
+
speech: result.output,
|
42
|
+
metadata: result.metadata,
|
43
|
+
}
|
44
|
+
: result.output;
|
45
|
+
}
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -0,0 +1 @@
|
|
1
|
+
export {};
|
@@ -0,0 +1,31 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.failedElevenLabsCallResponseHandler = void 0;
|
4
|
+
const ApiCallError_js_1 = require("../../util/api/ApiCallError.cjs");
|
5
|
+
const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
|
6
|
+
const responseBody = await response.text();
|
7
|
+
try {
|
8
|
+
// TODO implement ElevenLabsError
|
9
|
+
return new ApiCallError_js_1.ApiCallError({
|
10
|
+
message: responseBody,
|
11
|
+
statusCode: response.status,
|
12
|
+
url,
|
13
|
+
requestBodyValues,
|
14
|
+
});
|
15
|
+
}
|
16
|
+
catch (error) {
|
17
|
+
if (error instanceof Error) {
|
18
|
+
if (error.name === "AbortError" || error instanceof ApiCallError_js_1.ApiCallError) {
|
19
|
+
throw error;
|
20
|
+
}
|
21
|
+
}
|
22
|
+
throw new ApiCallError_js_1.ApiCallError({
|
23
|
+
message: responseBody,
|
24
|
+
cause: error,
|
25
|
+
statusCode: response.status,
|
26
|
+
url,
|
27
|
+
requestBodyValues,
|
28
|
+
});
|
29
|
+
}
|
30
|
+
};
|
31
|
+
exports.failedElevenLabsCallResponseHandler = failedElevenLabsCallResponseHandler;
|
@@ -0,0 +1,27 @@
|
|
1
|
+
import { ApiCallError } from "../../util/api/ApiCallError.js";
|
2
|
+
export const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
|
3
|
+
const responseBody = await response.text();
|
4
|
+
try {
|
5
|
+
// TODO implement ElevenLabsError
|
6
|
+
return new ApiCallError({
|
7
|
+
message: responseBody,
|
8
|
+
statusCode: response.status,
|
9
|
+
url,
|
10
|
+
requestBodyValues,
|
11
|
+
});
|
12
|
+
}
|
13
|
+
catch (error) {
|
14
|
+
if (error instanceof Error) {
|
15
|
+
if (error.name === "AbortError" || error instanceof ApiCallError) {
|
16
|
+
throw error;
|
17
|
+
}
|
18
|
+
}
|
19
|
+
throw new ApiCallError({
|
20
|
+
message: responseBody,
|
21
|
+
cause: error,
|
22
|
+
statusCode: response.status,
|
23
|
+
url,
|
24
|
+
requestBodyValues,
|
25
|
+
});
|
26
|
+
}
|
27
|
+
};
|
@@ -0,0 +1,88 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.ElevenLabsSpeechSynthesisModel = void 0;
|
4
|
+
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
5
|
+
const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
|
6
|
+
const postToApi_js_1 = require("../../util/api/postToApi.cjs");
|
7
|
+
const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
|
8
|
+
class ElevenLabsSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
|
9
|
+
constructor(settings) {
|
10
|
+
super({ settings });
|
11
|
+
Object.defineProperty(this, "provider", {
|
12
|
+
enumerable: true,
|
13
|
+
configurable: true,
|
14
|
+
writable: true,
|
15
|
+
value: "elevenlabs"
|
16
|
+
});
|
17
|
+
Object.defineProperty(this, "modelName", {
|
18
|
+
enumerable: true,
|
19
|
+
configurable: true,
|
20
|
+
writable: true,
|
21
|
+
value: null
|
22
|
+
});
|
23
|
+
}
|
24
|
+
get apiKey() {
|
25
|
+
const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
26
|
+
if (apiKey == null) {
|
27
|
+
throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
|
28
|
+
}
|
29
|
+
return apiKey;
|
30
|
+
}
|
31
|
+
async callAPI(text, options) {
|
32
|
+
const run = options?.run;
|
33
|
+
const settings = options?.settings;
|
34
|
+
const combinedSettings = {
|
35
|
+
...this.settings,
|
36
|
+
...settings,
|
37
|
+
};
|
38
|
+
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
39
|
+
retry: this.settings.retry,
|
40
|
+
throttle: this.settings.throttle,
|
41
|
+
call: async () => callElevenLabsTextToSpeechAPI({
|
42
|
+
baseUrl: combinedSettings.baseUrl,
|
43
|
+
abortSignal: run?.abortSignal,
|
44
|
+
apiKey: this.apiKey,
|
45
|
+
text,
|
46
|
+
voiceId: combinedSettings.voice,
|
47
|
+
modelId: combinedSettings.model,
|
48
|
+
voiceSettings: combinedSettings.voiceSettings,
|
49
|
+
}),
|
50
|
+
});
|
51
|
+
}
|
52
|
+
generateSpeechResponse(text, options) {
|
53
|
+
return this.callAPI(text, options);
|
54
|
+
}
|
55
|
+
withSettings(additionalSettings) {
|
56
|
+
return new ElevenLabsSpeechSynthesisModel({
|
57
|
+
...this.settings,
|
58
|
+
...additionalSettings,
|
59
|
+
});
|
60
|
+
}
|
61
|
+
}
|
62
|
+
exports.ElevenLabsSpeechSynthesisModel = ElevenLabsSpeechSynthesisModel;
|
63
|
+
/**
|
64
|
+
* @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
|
65
|
+
*/
|
66
|
+
async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
|
67
|
+
return (0, postToApi_js_1.postJsonToApi)({
|
68
|
+
url: `${baseUrl}/text-to-speech/${voiceId}`,
|
69
|
+
headers: {
|
70
|
+
"xi-api-key": apiKey,
|
71
|
+
},
|
72
|
+
body: {
|
73
|
+
text,
|
74
|
+
model_id: modelId,
|
75
|
+
voice_settings: voiceSettings != null
|
76
|
+
? {
|
77
|
+
stability: voiceSettings.stability,
|
78
|
+
similarity_boost: voiceSettings.similarityBoost,
|
79
|
+
style: voiceSettings.style,
|
80
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
81
|
+
}
|
82
|
+
: undefined,
|
83
|
+
},
|
84
|
+
failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
|
85
|
+
successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
|
86
|
+
abortSignal,
|
87
|
+
});
|
88
|
+
}
|
@@ -0,0 +1,29 @@
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
2
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
3
|
+
import { FunctionOptions } from "../../model-function/FunctionOptions.js";
|
4
|
+
import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "../../model-function/synthesize-speech/SpeechSynthesisModel.js";
|
5
|
+
import { RetryFunction } from "../../util/api/RetryFunction.js";
|
6
|
+
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
|
7
|
+
export interface ElevenLabsSpeechSynthesisModelSettings extends SpeechSynthesisModelSettings {
|
8
|
+
voice: string;
|
9
|
+
baseUrl?: string;
|
10
|
+
apiKey?: string;
|
11
|
+
model?: string;
|
12
|
+
voiceSettings?: {
|
13
|
+
stability: number;
|
14
|
+
similarityBoost: number;
|
15
|
+
style?: number;
|
16
|
+
useSpeakerBoost?: boolean;
|
17
|
+
};
|
18
|
+
retry?: RetryFunction;
|
19
|
+
throttle?: ThrottleFunction;
|
20
|
+
}
|
21
|
+
export declare class ElevenLabsSpeechSynthesisModel extends AbstractModel<ElevenLabsSpeechSynthesisModelSettings> implements SpeechSynthesisModel<ElevenLabsSpeechSynthesisModelSettings> {
|
22
|
+
constructor(settings: ElevenLabsSpeechSynthesisModelSettings);
|
23
|
+
readonly provider = "elevenlabs";
|
24
|
+
readonly modelName: null;
|
25
|
+
private get apiKey();
|
26
|
+
private callAPI;
|
27
|
+
generateSpeechResponse(text: string, options?: FunctionOptions<ElevenLabsSpeechSynthesisModelSettings> | undefined): Promise<Buffer>;
|
28
|
+
withSettings(additionalSettings: Partial<ElevenLabsSpeechSynthesisModelSettings>): this;
|
29
|
+
}
|
@@ -0,0 +1,84 @@
|
|
1
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
2
|
+
import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
|
3
|
+
import { createAudioMpegResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
|
4
|
+
import { failedElevenLabsCallResponseHandler } from "./ElevenLabsError.js";
|
5
|
+
export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
|
6
|
+
constructor(settings) {
|
7
|
+
super({ settings });
|
8
|
+
Object.defineProperty(this, "provider", {
|
9
|
+
enumerable: true,
|
10
|
+
configurable: true,
|
11
|
+
writable: true,
|
12
|
+
value: "elevenlabs"
|
13
|
+
});
|
14
|
+
Object.defineProperty(this, "modelName", {
|
15
|
+
enumerable: true,
|
16
|
+
configurable: true,
|
17
|
+
writable: true,
|
18
|
+
value: null
|
19
|
+
});
|
20
|
+
}
|
21
|
+
get apiKey() {
|
22
|
+
const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
|
23
|
+
if (apiKey == null) {
|
24
|
+
throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
|
25
|
+
}
|
26
|
+
return apiKey;
|
27
|
+
}
|
28
|
+
async callAPI(text, options) {
|
29
|
+
const run = options?.run;
|
30
|
+
const settings = options?.settings;
|
31
|
+
const combinedSettings = {
|
32
|
+
...this.settings,
|
33
|
+
...settings,
|
34
|
+
};
|
35
|
+
return callWithRetryAndThrottle({
|
36
|
+
retry: this.settings.retry,
|
37
|
+
throttle: this.settings.throttle,
|
38
|
+
call: async () => callElevenLabsTextToSpeechAPI({
|
39
|
+
baseUrl: combinedSettings.baseUrl,
|
40
|
+
abortSignal: run?.abortSignal,
|
41
|
+
apiKey: this.apiKey,
|
42
|
+
text,
|
43
|
+
voiceId: combinedSettings.voice,
|
44
|
+
modelId: combinedSettings.model,
|
45
|
+
voiceSettings: combinedSettings.voiceSettings,
|
46
|
+
}),
|
47
|
+
});
|
48
|
+
}
|
49
|
+
generateSpeechResponse(text, options) {
|
50
|
+
return this.callAPI(text, options);
|
51
|
+
}
|
52
|
+
withSettings(additionalSettings) {
|
53
|
+
return new ElevenLabsSpeechSynthesisModel({
|
54
|
+
...this.settings,
|
55
|
+
...additionalSettings,
|
56
|
+
});
|
57
|
+
}
|
58
|
+
}
|
59
|
+
/**
|
60
|
+
* @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
|
61
|
+
*/
|
62
|
+
async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
|
63
|
+
return postJsonToApi({
|
64
|
+
url: `${baseUrl}/text-to-speech/${voiceId}`,
|
65
|
+
headers: {
|
66
|
+
"xi-api-key": apiKey,
|
67
|
+
},
|
68
|
+
body: {
|
69
|
+
text,
|
70
|
+
model_id: modelId,
|
71
|
+
voice_settings: voiceSettings != null
|
72
|
+
? {
|
73
|
+
stability: voiceSettings.stability,
|
74
|
+
similarity_boost: voiceSettings.similarityBoost,
|
75
|
+
style: voiceSettings.style,
|
76
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
77
|
+
}
|
78
|
+
: undefined,
|
79
|
+
},
|
80
|
+
failedResponseHandler: failedElevenLabsCallResponseHandler,
|
81
|
+
successfulResponseHandler: createAudioMpegResponseHandler(),
|
82
|
+
abortSignal,
|
83
|
+
});
|
84
|
+
}
|
@@ -0,0 +1,17 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
3
|
+
if (k2 === undefined) k2 = k;
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
7
|
+
}
|
8
|
+
Object.defineProperty(o, k2, desc);
|
9
|
+
}) : (function(o, m, k, k2) {
|
10
|
+
if (k2 === undefined) k2 = k;
|
11
|
+
o[k2] = m[k];
|
12
|
+
}));
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
15
|
+
};
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
17
|
+
__exportStar(require("./ElevenLabsSpeechSynthesisModel.cjs"), exports);
|
@@ -0,0 +1 @@
|
|
1
|
+
export * from "./ElevenLabsSpeechSynthesisModel.js";
|
@@ -0,0 +1 @@
|
|
1
|
+
export * from "./ElevenLabsSpeechSynthesisModel.js";
|
package/model-provider/index.cjs
CHANGED
@@ -16,6 +16,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
|
16
16
|
Object.defineProperty(exports, "__esModule", { value: true });
|
17
17
|
__exportStar(require("./automatic1111/index.cjs"), exports);
|
18
18
|
__exportStar(require("./cohere/index.cjs"), exports);
|
19
|
+
__exportStar(require("./elevenlabs/index.cjs"), exports);
|
19
20
|
__exportStar(require("./huggingface/index.cjs"), exports);
|
20
21
|
__exportStar(require("./llamacpp/index.cjs"), exports);
|
21
22
|
__exportStar(require("./openai/index.cjs"), exports);
|
package/model-provider/index.js
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
import z from "zod";
|
3
3
|
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
4
4
|
import { FunctionOptions } from "../../model-function/FunctionOptions.js";
|
5
|
-
import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-
|
5
|
+
import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-speech/TranscriptionModel.js";
|
6
6
|
import { RetryFunction } from "../../util/api/RetryFunction.js";
|
7
7
|
import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
|
8
8
|
import { ResponseHandler } from "../../util/api/postToApi.js";
|
package/package.json
CHANGED
package/util/api/postToApi.cjs
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.postToApi = exports.postJsonToApi = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
|
3
|
+
exports.postToApi = exports.postJsonToApi = exports.createAudioMpegResponseHandler = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
|
4
4
|
const ApiCallError_js_1 = require("./ApiCallError.cjs");
|
5
5
|
const createJsonResponseHandler = (responseSchema) => async ({ response, url, requestBodyValues }) => {
|
6
6
|
const parsedResult = responseSchema.safeParse(await response.json());
|
@@ -18,6 +18,19 @@ const createJsonResponseHandler = (responseSchema) => async ({ response, url, re
|
|
18
18
|
exports.createJsonResponseHandler = createJsonResponseHandler;
|
19
19
|
const createTextResponseHandler = () => async ({ response }) => response.text();
|
20
20
|
exports.createTextResponseHandler = createTextResponseHandler;
|
21
|
+
const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
|
22
|
+
if (response.headers.get("Content-Type") !== "audio/mpeg") {
|
23
|
+
throw new ApiCallError_js_1.ApiCallError({
|
24
|
+
message: "Invalid Content-Type (must be audio/mpeg)",
|
25
|
+
statusCode: response.status,
|
26
|
+
url,
|
27
|
+
requestBodyValues,
|
28
|
+
});
|
29
|
+
}
|
30
|
+
const arrayBuffer = await response.arrayBuffer();
|
31
|
+
return Buffer.from(arrayBuffer);
|
32
|
+
};
|
33
|
+
exports.createAudioMpegResponseHandler = createAudioMpegResponseHandler;
|
21
34
|
const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => (0, exports.postToApi)({
|
22
35
|
url,
|
23
36
|
headers: {
|
package/util/api/postToApi.d.ts
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
/// <reference types="node" resolution-mode="require"/>
|
1
2
|
import { z } from "zod";
|
2
3
|
import { ApiCallError } from "./ApiCallError.js";
|
3
4
|
export type ResponseHandler<T> = (options: {
|
@@ -7,6 +8,7 @@ export type ResponseHandler<T> = (options: {
|
|
7
8
|
}) => PromiseLike<T>;
|
8
9
|
export declare const createJsonResponseHandler: <T>(responseSchema: z.ZodType<T, z.ZodTypeDef, T>) => ResponseHandler<T>;
|
9
10
|
export declare const createTextResponseHandler: () => ResponseHandler<string>;
|
11
|
+
export declare const createAudioMpegResponseHandler: () => ResponseHandler<Buffer>;
|
10
12
|
export declare const postJsonToApi: <T>({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }: {
|
11
13
|
url: string;
|
12
14
|
headers?: Record<string, string> | undefined;
|
package/util/api/postToApi.js
CHANGED
@@ -13,6 +13,18 @@ export const createJsonResponseHandler = (responseSchema) => async ({ response,
|
|
13
13
|
return parsedResult.data;
|
14
14
|
};
|
15
15
|
export const createTextResponseHandler = () => async ({ response }) => response.text();
|
16
|
+
export const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
|
17
|
+
if (response.headers.get("Content-Type") !== "audio/mpeg") {
|
18
|
+
throw new ApiCallError({
|
19
|
+
message: "Invalid Content-Type (must be audio/mpeg)",
|
20
|
+
statusCode: response.status,
|
21
|
+
url,
|
22
|
+
requestBodyValues,
|
23
|
+
});
|
24
|
+
}
|
25
|
+
const arrayBuffer = await response.arrayBuffer();
|
26
|
+
return Buffer.from(arrayBuffer);
|
27
|
+
};
|
16
28
|
export const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => postToApi({
|
17
29
|
url,
|
18
30
|
headers: {
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|