modelfusion 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +43 -16
  2. package/model-function/ModelCallEvent.d.ts +4 -3
  3. package/model-function/SuccessfulModelCall.cjs +1 -0
  4. package/model-function/SuccessfulModelCall.d.ts +1 -1
  5. package/model-function/SuccessfulModelCall.js +1 -0
  6. package/model-function/index.cjs +6 -3
  7. package/model-function/index.d.ts +6 -3
  8. package/model-function/index.js +6 -3
  9. package/model-function/synthesize-speech/SpeechSynthesisEvent.d.ts +22 -0
  10. package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts +11 -0
  11. package/model-function/synthesize-speech/synthesizeSpeech.cjs +49 -0
  12. package/model-function/synthesize-speech/synthesizeSpeech.d.ts +16 -0
  13. package/model-function/synthesize-speech/synthesizeSpeech.js +45 -0
  14. package/model-function/transcribe-speech/TranscriptionEvent.cjs +2 -0
  15. package/model-function/transcribe-speech/TranscriptionEvent.js +1 -0
  16. package/model-function/transcribe-speech/TranscriptionModel.cjs +2 -0
  17. package/model-function/transcribe-speech/TranscriptionModel.js +1 -0
  18. package/model-provider/elevenlabs/ElevenLabsError.cjs +31 -0
  19. package/model-provider/elevenlabs/ElevenLabsError.d.ts +3 -0
  20. package/model-provider/elevenlabs/ElevenLabsError.js +27 -0
  21. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +88 -0
  22. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +29 -0
  23. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +84 -0
  24. package/model-provider/elevenlabs/index.cjs +17 -0
  25. package/model-provider/elevenlabs/index.d.ts +1 -0
  26. package/model-provider/elevenlabs/index.js +1 -0
  27. package/model-provider/index.cjs +1 -0
  28. package/model-provider/index.d.ts +1 -0
  29. package/model-provider/index.js +1 -0
  30. package/model-provider/openai/OpenAITranscriptionModel.d.ts +1 -1
  31. package/package.json +1 -1
  32. package/util/api/postToApi.cjs +14 -1
  33. package/util/api/postToApi.d.ts +2 -0
  34. package/util/api/postToApi.js +12 -0
  35. /package/model-function/{transcribe-audio/TranscriptionEvent.cjs → synthesize-speech/SpeechSynthesisEvent.cjs} +0 -0
  36. /package/model-function/{transcribe-audio/TranscriptionEvent.js → synthesize-speech/SpeechSynthesisEvent.js} +0 -0
  37. /package/model-function/{transcribe-audio/TranscriptionModel.cjs → synthesize-speech/SpeechSynthesisModel.cjs} +0 -0
  38. /package/model-function/{transcribe-audio/TranscriptionModel.js → synthesize-speech/SpeechSynthesisModel.js} +0 -0
  39. /package/model-function/{transcribe-audio → transcribe-speech}/TranscriptionEvent.d.ts +0 -0
  40. /package/model-function/{transcribe-audio → transcribe-speech}/TranscriptionModel.d.ts +0 -0
  41. /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.cjs +0 -0
  42. /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.d.ts +0 -0
  43. /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.js +0 -0
package/README.md CHANGED
@@ -20,7 +20,7 @@ ModelFusion is a library for building AI apps, chatbots, and agents. It provides
20
20
  - **Type inference and validation**: ModelFusion uses TypeScript and [Zod](https://github.com/colinhacks/zod) to infer types wherever possible and to validate model responses.
21
21
  - **Flexibility and control**: AI application development can be complex and unique to each project. With ModelFusion, you have complete control over the prompts and model settings, and you can access the raw responses from the models quickly to build what you need.
22
22
  - **No chains and predefined prompts**: Use the concepts provided by JavaScript (variables, functions, etc.) and explicit prompts to build applications you can easily understand and control. Not black magic.
23
- - **More than LLMs**: ModelFusion supports other models, e.g., text-to-image and voice-to-text, to help you build rich AI applications that go beyond just text.
23
+ - **Multimodal Support**: Beyond just LLMs, ModelFusion encompasses a diverse array of models including text generation, text-to-speech, speech-to-text, and image generation, allowing you to build multifaceted AI applications with ease.
24
24
  - **Integrated support features**: Essential features like logging, retries, throttling, tracing, and error handling are built-in, helping you focus more on building your application.
25
25
 
26
26
  ## Quick Install
@@ -260,9 +260,9 @@ const { tool, parameters, result, text } = await useToolOrGenerateText(
260
260
  );
261
261
  ```
262
262
 
263
- ### [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
263
+ ### [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
264
264
 
265
- Turn audio (voice) into text.
265
+ Turn speech (audio) into text.
266
266
 
267
267
  ```ts
268
268
  const transcription = await transcribe(
@@ -274,6 +274,20 @@ const transcription = await transcribe(
274
274
  );
275
275
  ```
276
276
 
277
+ ### [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
278
+
279
+ Turn text into speech (audio).
280
+
281
+ ```ts
282
+ // `speech` is a Buffer with MP3 audio data
283
+ const speech = await synthesizeSpeech(
284
+ new ElevenLabsSpeechSynthesisModel({
285
+ voice: "ErXwobaYiN019PkySvjV",
286
+ }),
287
+ "Hello, World!"
288
+ );
289
+ ```
290
+
277
291
  ### [Generate Image](https://modelfusion.dev/guide/function/generate-image)
278
292
 
279
293
  Generate a base64-encoded image from a prompt.
@@ -356,7 +370,8 @@ const { chunks } = await retrieveTextChunks(
356
370
  - [Generate JSON or text](https://modelfusion.dev/guide/function/generate-json-or-text)
357
371
  - [Embed Text](https://modelfusion.dev/guide/function/embed-text)
358
372
  - [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
359
- - [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
373
+ - [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
374
+ - [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
360
375
  - [Generate images](https://modelfusion.dev/guide/function/generate-image)
361
376
  - Summarize text
362
377
  - [Tools](https://modelfusion.dev/guide/tools)
@@ -375,18 +390,30 @@ const { chunks } = await retrieveTextChunks(
375
390
 
376
391
  ### Model Providers
377
392
 
378
- | | [OpenAI](https://modelfusion.dev/integration/model-provider/openai) | [Cohere](https://modelfusion.dev/integration/model-provider/cohere) | [Llama.cpp](https://modelfusion.dev/integration/model-provider/llamacpp) | [Hugging Face](https://modelfusion.dev/integration/model-provider/huggingface) | [Stability AI](https://modelfusion.dev/integration/model-provider/stability) | [Automatic1111](https://modelfusion.dev/integration/model-provider/automatic1111) |
379
- | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------ | ---------------------------------------------------------------------------- | --------------------------------------------------------------------------------- |
380
- | Hosting | cloud | cloud | server (local) | cloud | cloud | server (local) |
381
- | [Generate text](https://modelfusion.dev/guide/function/generate-text) | | | | | | |
382
- | [Stream text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ | | | |
383
- | [Generate JSON](https://modelfusion.dev/guide/function/generate-json) | chat models | | | | | |
384
- | [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models | | | | | |
385
- | [Embed text](https://modelfusion.dev/guide/function/embed-text) | | ✅ | || | |
386
- | [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | | | |
387
- | [Generate image](https://modelfusion.dev/guide/function/generate-image) | | | | | | |
388
- | [Transcribe audio](https://modelfusion.dev/guide/function/transcribe-audio) | ✅ | | | | | |
389
- | [Cost calculation](https://modelfusion.dev/guide/run/cost-calculation) | ✅ | | | | | |
393
+ #### Text and JSON Generation
394
+
395
+ | | [OpenAI](https://modelfusion.dev/integration/model-provider/openai) | [Cohere](https://modelfusion.dev/integration/model-provider/cohere) | [Llama.cpp](https://modelfusion.dev/integration/model-provider/llamacpp) | [Hugging Face](https://modelfusion.dev/integration/model-provider/huggingface) |
396
+ | ------------------------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------- | ------------------------------------------------------------------------ | ------------------------------------------------------------------------------ |
397
+ | [Generate text](https://modelfusion.dev/guide/function/generate-text) | ✅ | ✅ | ✅ ||
398
+ | [Stream text](https://modelfusion.dev/guide/function/generate-text) | ✅ | || |
399
+ | [Generate JSON](https://modelfusion.dev/guide/function/generate-json) | chat models | | | |
400
+ | [Generate JSON or Text](https://modelfusion.dev/guide/function/generate-json-or-text) | chat models | | | |
401
+ | [Embed text](https://modelfusion.dev/guide/function/embed-text) | | | ||
402
+ | [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | |
403
+
404
+ #### Image Generation
405
+
406
+ - [OpenAI (Dall·E)](https://modelfusion.dev/integration/model-provider/openai)
407
+ - [Stability AI](https://modelfusion.dev/integration/model-provider/stability)
408
+ - [Automatic1111](https://modelfusion.dev/integration/model-provider/automatic1111)
409
+
410
+ #### Speech Transcription
411
+
412
+ - [OpenAI (Whisper)](https://modelfusion.dev/integration/model-provider/openai)
413
+
414
+ #### Speech Synthesis
415
+
416
+ - [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs)
390
417
 
391
418
  ### Vector Indices
392
419
 
@@ -5,12 +5,13 @@ import { ImageGenerationFinishedEvent, ImageGenerationStartedEvent } from "./gen
5
5
  import { JsonGenerationFinishedEvent, JsonGenerationStartedEvent } from "./generate-json/JsonGenerationEvent.js";
6
6
  import { TextGenerationFinishedEvent, TextGenerationStartedEvent } from "./generate-text/TextGenerationEvent.js";
7
7
  import { TextStreamingFinishedEvent, TextStreamingStartedEvent } from "./generate-text/TextStreamingEvent.js";
8
- import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-audio/TranscriptionEvent.js";
8
+ import { SpeechSynthesisFinishedEvent, SpeechSynthesisStartedEvent } from "./synthesize-speech/SpeechSynthesisEvent.js";
9
+ import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-speech/TranscriptionEvent.js";
9
10
  export type ModelCallStartedEventMetadata = RunFunctionStartedEventMetadata & {
10
11
  model: ModelInformation;
11
12
  };
12
- export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
13
+ export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
13
14
  export type ModelCallFinishedEventMetadata = RunFunctionFinishedEventMetadata & {
14
15
  model: ModelInformation;
15
16
  };
16
- export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
17
+ export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
@@ -18,6 +18,7 @@ const eventTypeToCostType = {
18
18
  "image-generation-finished": "image-generation",
19
19
  "json-generation-finished": "json-generation",
20
20
  "json-or-text-generation-finished": "json-or-text-generation",
21
+ "speech-synthesis-finished": "speech-synthesis",
21
22
  "text-embedding-finished": "text-embedding",
22
23
  "text-generation-finished": "text-generation",
23
24
  "text-streaming-finished": "text-streaming",
@@ -1,7 +1,7 @@
1
1
  import { RunFunctionEvent } from "../run/RunFunctionEvent.js";
2
2
  import { ModelInformation } from "./ModelInformation.js";
3
3
  export type SuccessfulModelCall = {
4
- type: "image-generation" | "json-generation" | "json-or-text-generation" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
4
+ type: "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
5
5
  model: ModelInformation;
6
6
  settings: unknown;
7
7
  response: unknown;
@@ -14,6 +14,7 @@ const eventTypeToCostType = {
14
14
  "image-generation-finished": "image-generation",
15
15
  "json-generation-finished": "json-generation",
16
16
  "json-or-text-generation-finished": "json-or-text-generation",
17
+ "speech-synthesis-finished": "speech-synthesis",
17
18
  "text-embedding-finished": "text-embedding",
18
19
  "text-generation-finished": "text-generation",
19
20
  "text-streaming-finished": "text-streaming",
@@ -41,8 +41,11 @@ __exportStar(require("./generate-text/TextGenerationEvent.cjs"), exports);
41
41
  __exportStar(require("./generate-text/TextGenerationModel.cjs"), exports);
42
42
  __exportStar(require("./generate-text/generateText.cjs"), exports);
43
43
  __exportStar(require("./generate-text/streamText.cjs"), exports);
44
+ __exportStar(require("./synthesize-speech/SpeechSynthesisEvent.cjs"), exports);
45
+ __exportStar(require("./synthesize-speech/SpeechSynthesisModel.cjs"), exports);
46
+ __exportStar(require("./synthesize-speech/synthesizeSpeech.cjs"), exports);
44
47
  __exportStar(require("./tokenize-text/Tokenizer.cjs"), exports);
45
48
  __exportStar(require("./tokenize-text/countTokens.cjs"), exports);
46
- __exportStar(require("./transcribe-audio/TranscriptionEvent.cjs"), exports);
47
- __exportStar(require("./transcribe-audio/TranscriptionModel.cjs"), exports);
48
- __exportStar(require("./transcribe-audio/transcribe.cjs"), exports);
49
+ __exportStar(require("./transcribe-speech/TranscriptionEvent.cjs"), exports);
50
+ __exportStar(require("./transcribe-speech/TranscriptionModel.cjs"), exports);
51
+ __exportStar(require("./transcribe-speech/transcribe.cjs"), exports);
@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
25
25
  export * from "./generate-text/TextGenerationModel.js";
26
26
  export * from "./generate-text/generateText.js";
27
27
  export * from "./generate-text/streamText.js";
28
+ export * from "./synthesize-speech/SpeechSynthesisEvent.js";
29
+ export * from "./synthesize-speech/SpeechSynthesisModel.js";
30
+ export * from "./synthesize-speech/synthesizeSpeech.js";
28
31
  export * from "./tokenize-text/Tokenizer.js";
29
32
  export * from "./tokenize-text/countTokens.js";
30
- export * from "./transcribe-audio/TranscriptionEvent.js";
31
- export * from "./transcribe-audio/TranscriptionModel.js";
32
- export * from "./transcribe-audio/transcribe.js";
33
+ export * from "./transcribe-speech/TranscriptionEvent.js";
34
+ export * from "./transcribe-speech/TranscriptionModel.js";
35
+ export * from "./transcribe-speech/transcribe.js";
@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
25
25
  export * from "./generate-text/TextGenerationModel.js";
26
26
  export * from "./generate-text/generateText.js";
27
27
  export * from "./generate-text/streamText.js";
28
+ export * from "./synthesize-speech/SpeechSynthesisEvent.js";
29
+ export * from "./synthesize-speech/SpeechSynthesisModel.js";
30
+ export * from "./synthesize-speech/synthesizeSpeech.js";
28
31
  export * from "./tokenize-text/Tokenizer.js";
29
32
  export * from "./tokenize-text/countTokens.js";
30
- export * from "./transcribe-audio/TranscriptionEvent.js";
31
- export * from "./transcribe-audio/TranscriptionModel.js";
32
- export * from "./transcribe-audio/transcribe.js";
33
+ export * from "./transcribe-speech/TranscriptionEvent.js";
34
+ export * from "./transcribe-speech/TranscriptionModel.js";
35
+ export * from "./transcribe-speech/transcribe.js";
@@ -0,0 +1,22 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { ModelCallFinishedEventMetadata, ModelCallStartedEventMetadata } from "../ModelCallEvent.js";
3
+ export type SpeechSynthesisStartedEvent = {
4
+ type: "speech-synthesis-started";
5
+ metadata: ModelCallStartedEventMetadata;
6
+ settings: unknown;
7
+ text: string;
8
+ };
9
+ export type SpeechSynthesisFinishedEvent = {
10
+ type: "speech-synthesis-finished";
11
+ metadata: ModelCallFinishedEventMetadata;
12
+ settings: unknown;
13
+ text: string;
14
+ } & ({
15
+ status: "success";
16
+ response: Buffer;
17
+ } | {
18
+ status: "failure";
19
+ error: unknown;
20
+ } | {
21
+ status: "abort";
22
+ });
@@ -0,0 +1,11 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { FunctionOptions } from "../FunctionOptions.js";
3
+ import { Model, ModelSettings } from "../Model.js";
4
+ export interface SpeechSynthesisModelSettings extends ModelSettings {
5
+ }
6
+ export interface SpeechSynthesisModel<SETTINGS> extends Model<SETTINGS> {
7
+ /**
8
+ * Generates an mp3 audio buffer that contains the speech for the given text.
9
+ */
10
+ generateSpeechResponse: (text: string, options?: FunctionOptions<SETTINGS>) => PromiseLike<Buffer>;
11
+ }
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.synthesizeSpeech = void 0;
4
+ const executeCall_js_1 = require("../executeCall.cjs");
5
+ async function synthesizeSpeech(model, text, options) {
6
+ const result = await (0, executeCall_js_1.executeCall)({
7
+ model,
8
+ options,
9
+ generateResponse: (options) => model.generateSpeechResponse(text, options),
10
+ extractOutputValue: (buffer) => buffer,
11
+ getStartEvent: (metadata, settings) => ({
12
+ type: "speech-synthesis-started",
13
+ metadata,
14
+ settings,
15
+ text,
16
+ }),
17
+ getAbortEvent: (metadata, settings) => ({
18
+ type: "speech-synthesis-finished",
19
+ status: "abort",
20
+ settings,
21
+ metadata,
22
+ text,
23
+ }),
24
+ getFailureEvent: (metadata, settings, error) => ({
25
+ type: "speech-synthesis-finished",
26
+ status: "failure",
27
+ metadata,
28
+ settings,
29
+ text,
30
+ error,
31
+ }),
32
+ getSuccessEvent: (metadata, settings, response, output) => ({
33
+ type: "speech-synthesis-finished",
34
+ status: "success",
35
+ metadata,
36
+ settings,
37
+ text,
38
+ response,
39
+ speech: output,
40
+ }),
41
+ });
42
+ return options?.fullResponse === true
43
+ ? {
44
+ speech: result.output,
45
+ metadata: result.metadata,
46
+ }
47
+ : result.output;
48
+ }
49
+ exports.synthesizeSpeech = synthesizeSpeech;
@@ -0,0 +1,16 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { FunctionOptions } from "../FunctionOptions.js";
3
+ import { CallMetadata } from "../executeCall.js";
4
+ import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
5
+ /**
6
+ * Synthesizes speech from text.
7
+ */
8
+ export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options: FunctionOptions<SETTINGS> & {
9
+ fullResponse: true;
10
+ }): Promise<{
11
+ speech: Buffer;
12
+ metadata: CallMetadata<SpeechSynthesisModel<SETTINGS>>;
13
+ }>;
14
+ export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options?: FunctionOptions<SETTINGS> & {
15
+ fullResponse?: false;
16
+ }): Promise<Buffer>;
@@ -0,0 +1,45 @@
1
+ import { executeCall } from "../executeCall.js";
2
+ export async function synthesizeSpeech(model, text, options) {
3
+ const result = await executeCall({
4
+ model,
5
+ options,
6
+ generateResponse: (options) => model.generateSpeechResponse(text, options),
7
+ extractOutputValue: (buffer) => buffer,
8
+ getStartEvent: (metadata, settings) => ({
9
+ type: "speech-synthesis-started",
10
+ metadata,
11
+ settings,
12
+ text,
13
+ }),
14
+ getAbortEvent: (metadata, settings) => ({
15
+ type: "speech-synthesis-finished",
16
+ status: "abort",
17
+ settings,
18
+ metadata,
19
+ text,
20
+ }),
21
+ getFailureEvent: (metadata, settings, error) => ({
22
+ type: "speech-synthesis-finished",
23
+ status: "failure",
24
+ metadata,
25
+ settings,
26
+ text,
27
+ error,
28
+ }),
29
+ getSuccessEvent: (metadata, settings, response, output) => ({
30
+ type: "speech-synthesis-finished",
31
+ status: "success",
32
+ metadata,
33
+ settings,
34
+ text,
35
+ response,
36
+ speech: output,
37
+ }),
38
+ });
39
+ return options?.fullResponse === true
40
+ ? {
41
+ speech: result.output,
42
+ metadata: result.metadata,
43
+ }
44
+ : result.output;
45
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,31 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.failedElevenLabsCallResponseHandler = void 0;
4
+ const ApiCallError_js_1 = require("../../util/api/ApiCallError.cjs");
5
+ const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
6
+ const responseBody = await response.text();
7
+ try {
8
+ // TODO implement ElevenLabsError
9
+ return new ApiCallError_js_1.ApiCallError({
10
+ message: responseBody,
11
+ statusCode: response.status,
12
+ url,
13
+ requestBodyValues,
14
+ });
15
+ }
16
+ catch (error) {
17
+ if (error instanceof Error) {
18
+ if (error.name === "AbortError" || error instanceof ApiCallError_js_1.ApiCallError) {
19
+ throw error;
20
+ }
21
+ }
22
+ throw new ApiCallError_js_1.ApiCallError({
23
+ message: responseBody,
24
+ cause: error,
25
+ statusCode: response.status,
26
+ url,
27
+ requestBodyValues,
28
+ });
29
+ }
30
+ };
31
+ exports.failedElevenLabsCallResponseHandler = failedElevenLabsCallResponseHandler;
@@ -0,0 +1,3 @@
1
+ import { ApiCallError } from "../../util/api/ApiCallError.js";
2
+ import { ResponseHandler } from "../../util/api/postToApi.js";
3
+ export declare const failedElevenLabsCallResponseHandler: ResponseHandler<ApiCallError>;
@@ -0,0 +1,27 @@
1
+ import { ApiCallError } from "../../util/api/ApiCallError.js";
2
+ export const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
3
+ const responseBody = await response.text();
4
+ try {
5
+ // TODO implement ElevenLabsError
6
+ return new ApiCallError({
7
+ message: responseBody,
8
+ statusCode: response.status,
9
+ url,
10
+ requestBodyValues,
11
+ });
12
+ }
13
+ catch (error) {
14
+ if (error instanceof Error) {
15
+ if (error.name === "AbortError" || error instanceof ApiCallError) {
16
+ throw error;
17
+ }
18
+ }
19
+ throw new ApiCallError({
20
+ message: responseBody,
21
+ cause: error,
22
+ statusCode: response.status,
23
+ url,
24
+ requestBodyValues,
25
+ });
26
+ }
27
+ };
@@ -0,0 +1,88 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ElevenLabsSpeechSynthesisModel = void 0;
4
+ const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
5
+ const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
6
+ const postToApi_js_1 = require("../../util/api/postToApi.cjs");
7
+ const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
8
+ class ElevenLabsSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
9
+ constructor(settings) {
10
+ super({ settings });
11
+ Object.defineProperty(this, "provider", {
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true,
15
+ value: "elevenlabs"
16
+ });
17
+ Object.defineProperty(this, "modelName", {
18
+ enumerable: true,
19
+ configurable: true,
20
+ writable: true,
21
+ value: null
22
+ });
23
+ }
24
+ get apiKey() {
25
+ const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
26
+ if (apiKey == null) {
27
+ throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
28
+ }
29
+ return apiKey;
30
+ }
31
+ async callAPI(text, options) {
32
+ const run = options?.run;
33
+ const settings = options?.settings;
34
+ const combinedSettings = {
35
+ ...this.settings,
36
+ ...settings,
37
+ };
38
+ return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
39
+ retry: this.settings.retry,
40
+ throttle: this.settings.throttle,
41
+ call: async () => callElevenLabsTextToSpeechAPI({
42
+ baseUrl: combinedSettings.baseUrl,
43
+ abortSignal: run?.abortSignal,
44
+ apiKey: this.apiKey,
45
+ text,
46
+ voiceId: combinedSettings.voice,
47
+ modelId: combinedSettings.model,
48
+ voiceSettings: combinedSettings.voiceSettings,
49
+ }),
50
+ });
51
+ }
52
+ generateSpeechResponse(text, options) {
53
+ return this.callAPI(text, options);
54
+ }
55
+ withSettings(additionalSettings) {
56
+ return new ElevenLabsSpeechSynthesisModel({
57
+ ...this.settings,
58
+ ...additionalSettings,
59
+ });
60
+ }
61
+ }
62
+ exports.ElevenLabsSpeechSynthesisModel = ElevenLabsSpeechSynthesisModel;
63
+ /**
64
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
65
+ */
66
+ async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
67
+ return (0, postToApi_js_1.postJsonToApi)({
68
+ url: `${baseUrl}/text-to-speech/${voiceId}`,
69
+ headers: {
70
+ "xi-api-key": apiKey,
71
+ },
72
+ body: {
73
+ text,
74
+ model_id: modelId,
75
+ voice_settings: voiceSettings != null
76
+ ? {
77
+ stability: voiceSettings.stability,
78
+ similarity_boost: voiceSettings.similarityBoost,
79
+ style: voiceSettings.style,
80
+ use_speaker_boost: voiceSettings.useSpeakerBoost,
81
+ }
82
+ : undefined,
83
+ },
84
+ failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
85
+ successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
86
+ abortSignal,
87
+ });
88
+ }
@@ -0,0 +1,29 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
3
+ import { FunctionOptions } from "../../model-function/FunctionOptions.js";
4
+ import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "../../model-function/synthesize-speech/SpeechSynthesisModel.js";
5
+ import { RetryFunction } from "../../util/api/RetryFunction.js";
6
+ import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
7
+ export interface ElevenLabsSpeechSynthesisModelSettings extends SpeechSynthesisModelSettings {
8
+ voice: string;
9
+ baseUrl?: string;
10
+ apiKey?: string;
11
+ model?: string;
12
+ voiceSettings?: {
13
+ stability: number;
14
+ similarityBoost: number;
15
+ style?: number;
16
+ useSpeakerBoost?: boolean;
17
+ };
18
+ retry?: RetryFunction;
19
+ throttle?: ThrottleFunction;
20
+ }
21
+ export declare class ElevenLabsSpeechSynthesisModel extends AbstractModel<ElevenLabsSpeechSynthesisModelSettings> implements SpeechSynthesisModel<ElevenLabsSpeechSynthesisModelSettings> {
22
+ constructor(settings: ElevenLabsSpeechSynthesisModelSettings);
23
+ readonly provider = "elevenlabs";
24
+ readonly modelName: null;
25
+ private get apiKey();
26
+ private callAPI;
27
+ generateSpeechResponse(text: string, options?: FunctionOptions<ElevenLabsSpeechSynthesisModelSettings> | undefined): Promise<Buffer>;
28
+ withSettings(additionalSettings: Partial<ElevenLabsSpeechSynthesisModelSettings>): this;
29
+ }
@@ -0,0 +1,84 @@
1
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
2
+ import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
3
+ import { createAudioMpegResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
4
+ import { failedElevenLabsCallResponseHandler } from "./ElevenLabsError.js";
5
+ export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
6
+ constructor(settings) {
7
+ super({ settings });
8
+ Object.defineProperty(this, "provider", {
9
+ enumerable: true,
10
+ configurable: true,
11
+ writable: true,
12
+ value: "elevenlabs"
13
+ });
14
+ Object.defineProperty(this, "modelName", {
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true,
18
+ value: null
19
+ });
20
+ }
21
+ get apiKey() {
22
+ const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
23
+ if (apiKey == null) {
24
+ throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
25
+ }
26
+ return apiKey;
27
+ }
28
+ async callAPI(text, options) {
29
+ const run = options?.run;
30
+ const settings = options?.settings;
31
+ const combinedSettings = {
32
+ ...this.settings,
33
+ ...settings,
34
+ };
35
+ return callWithRetryAndThrottle({
36
+ retry: this.settings.retry,
37
+ throttle: this.settings.throttle,
38
+ call: async () => callElevenLabsTextToSpeechAPI({
39
+ baseUrl: combinedSettings.baseUrl,
40
+ abortSignal: run?.abortSignal,
41
+ apiKey: this.apiKey,
42
+ text,
43
+ voiceId: combinedSettings.voice,
44
+ modelId: combinedSettings.model,
45
+ voiceSettings: combinedSettings.voiceSettings,
46
+ }),
47
+ });
48
+ }
49
+ generateSpeechResponse(text, options) {
50
+ return this.callAPI(text, options);
51
+ }
52
+ withSettings(additionalSettings) {
53
+ return new ElevenLabsSpeechSynthesisModel({
54
+ ...this.settings,
55
+ ...additionalSettings,
56
+ });
57
+ }
58
+ }
59
+ /**
60
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
61
+ */
62
+ async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, modelId, voiceSettings, }) {
63
+ return postJsonToApi({
64
+ url: `${baseUrl}/text-to-speech/${voiceId}`,
65
+ headers: {
66
+ "xi-api-key": apiKey,
67
+ },
68
+ body: {
69
+ text,
70
+ model_id: modelId,
71
+ voice_settings: voiceSettings != null
72
+ ? {
73
+ stability: voiceSettings.stability,
74
+ similarity_boost: voiceSettings.similarityBoost,
75
+ style: voiceSettings.style,
76
+ use_speaker_boost: voiceSettings.useSpeakerBoost,
77
+ }
78
+ : undefined,
79
+ },
80
+ failedResponseHandler: failedElevenLabsCallResponseHandler,
81
+ successfulResponseHandler: createAudioMpegResponseHandler(),
82
+ abortSignal,
83
+ });
84
+ }
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./ElevenLabsSpeechSynthesisModel.cjs"), exports);
@@ -0,0 +1 @@
1
+ export * from "./ElevenLabsSpeechSynthesisModel.js";
@@ -0,0 +1 @@
1
+ export * from "./ElevenLabsSpeechSynthesisModel.js";
@@ -16,6 +16,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  __exportStar(require("./automatic1111/index.cjs"), exports);
18
18
  __exportStar(require("./cohere/index.cjs"), exports);
19
+ __exportStar(require("./elevenlabs/index.cjs"), exports);
19
20
  __exportStar(require("./huggingface/index.cjs"), exports);
20
21
  __exportStar(require("./llamacpp/index.cjs"), exports);
21
22
  __exportStar(require("./openai/index.cjs"), exports);
@@ -1,5 +1,6 @@
1
1
  export * from "./automatic1111/index.js";
2
2
  export * from "./cohere/index.js";
3
+ export * from "./elevenlabs/index.js";
3
4
  export * from "./huggingface/index.js";
4
5
  export * from "./llamacpp/index.js";
5
6
  export * from "./openai/index.js";
@@ -1,5 +1,6 @@
1
1
  export * from "./automatic1111/index.js";
2
2
  export * from "./cohere/index.js";
3
+ export * from "./elevenlabs/index.js";
3
4
  export * from "./huggingface/index.js";
4
5
  export * from "./llamacpp/index.js";
5
6
  export * from "./openai/index.js";
@@ -2,7 +2,7 @@
2
2
  import z from "zod";
3
3
  import { AbstractModel } from "../../model-function/AbstractModel.js";
4
4
  import { FunctionOptions } from "../../model-function/FunctionOptions.js";
5
- import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-audio/TranscriptionModel.js";
5
+ import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-speech/TranscriptionModel.js";
6
6
  import { RetryFunction } from "../../util/api/RetryFunction.js";
7
7
  import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
8
8
  import { ResponseHandler } from "../../util/api/postToApi.js";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
4
- "version": "0.16.0",
4
+ "version": "0.18.0",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.postToApi = exports.postJsonToApi = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
3
+ exports.postToApi = exports.postJsonToApi = exports.createAudioMpegResponseHandler = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
4
4
  const ApiCallError_js_1 = require("./ApiCallError.cjs");
5
5
  const createJsonResponseHandler = (responseSchema) => async ({ response, url, requestBodyValues }) => {
6
6
  const parsedResult = responseSchema.safeParse(await response.json());
@@ -18,6 +18,19 @@ const createJsonResponseHandler = (responseSchema) => async ({ response, url, re
18
18
  exports.createJsonResponseHandler = createJsonResponseHandler;
19
19
  const createTextResponseHandler = () => async ({ response }) => response.text();
20
20
  exports.createTextResponseHandler = createTextResponseHandler;
21
+ const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
22
+ if (response.headers.get("Content-Type") !== "audio/mpeg") {
23
+ throw new ApiCallError_js_1.ApiCallError({
24
+ message: "Invalid Content-Type (must be audio/mpeg)",
25
+ statusCode: response.status,
26
+ url,
27
+ requestBodyValues,
28
+ });
29
+ }
30
+ const arrayBuffer = await response.arrayBuffer();
31
+ return Buffer.from(arrayBuffer);
32
+ };
33
+ exports.createAudioMpegResponseHandler = createAudioMpegResponseHandler;
21
34
  const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => (0, exports.postToApi)({
22
35
  url,
23
36
  headers: {
@@ -1,3 +1,4 @@
1
+ /// <reference types="node" resolution-mode="require"/>
1
2
  import { z } from "zod";
2
3
  import { ApiCallError } from "./ApiCallError.js";
3
4
  export type ResponseHandler<T> = (options: {
@@ -7,6 +8,7 @@ export type ResponseHandler<T> = (options: {
7
8
  }) => PromiseLike<T>;
8
9
  export declare const createJsonResponseHandler: <T>(responseSchema: z.ZodType<T, z.ZodTypeDef, T>) => ResponseHandler<T>;
9
10
  export declare const createTextResponseHandler: () => ResponseHandler<string>;
11
+ export declare const createAudioMpegResponseHandler: () => ResponseHandler<Buffer>;
10
12
  export declare const postJsonToApi: <T>({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }: {
11
13
  url: string;
12
14
  headers?: Record<string, string> | undefined;
@@ -13,6 +13,18 @@ export const createJsonResponseHandler = (responseSchema) => async ({ response,
13
13
  return parsedResult.data;
14
14
  };
15
15
  export const createTextResponseHandler = () => async ({ response }) => response.text();
16
+ export const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
17
+ if (response.headers.get("Content-Type") !== "audio/mpeg") {
18
+ throw new ApiCallError({
19
+ message: "Invalid Content-Type (must be audio/mpeg)",
20
+ statusCode: response.status,
21
+ url,
22
+ requestBodyValues,
23
+ });
24
+ }
25
+ const arrayBuffer = await response.arrayBuffer();
26
+ return Buffer.from(arrayBuffer);
27
+ };
16
28
  export const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => postToApi({
17
29
  url,
18
30
  headers: {