modelfusion 0.15.0 → 0.17.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/README.md +18 -4
  2. package/model-function/ModelCallEvent.d.ts +4 -3
  3. package/model-function/SuccessfulModelCall.cjs +1 -0
  4. package/model-function/SuccessfulModelCall.d.ts +1 -1
  5. package/model-function/SuccessfulModelCall.js +1 -0
  6. package/model-function/index.cjs +6 -3
  7. package/model-function/index.d.ts +6 -3
  8. package/model-function/index.js +6 -3
  9. package/model-function/synthesize-speech/SpeechSynthesisEvent.d.ts +22 -0
  10. package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts +11 -0
  11. package/model-function/synthesize-speech/synthesizeSpeech.cjs +49 -0
  12. package/model-function/synthesize-speech/synthesizeSpeech.d.ts +13 -0
  13. package/model-function/synthesize-speech/synthesizeSpeech.js +45 -0
  14. package/model-function/transcribe-speech/TranscriptionEvent.cjs +2 -0
  15. package/model-function/transcribe-speech/TranscriptionEvent.js +1 -0
  16. package/model-function/transcribe-speech/TranscriptionModel.cjs +2 -0
  17. package/model-function/transcribe-speech/TranscriptionModel.js +1 -0
  18. package/model-provider/elevenlabs/ElevenLabsError.cjs +31 -0
  19. package/model-provider/elevenlabs/ElevenLabsError.d.ts +3 -0
  20. package/model-provider/elevenlabs/ElevenLabsError.js +27 -0
  21. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +77 -0
  22. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +22 -0
  23. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +73 -0
  24. package/model-provider/elevenlabs/index.cjs +17 -0
  25. package/model-provider/elevenlabs/index.d.ts +1 -0
  26. package/model-provider/elevenlabs/index.js +1 -0
  27. package/model-provider/index.cjs +1 -0
  28. package/model-provider/index.d.ts +1 -0
  29. package/model-provider/index.js +1 -0
  30. package/model-provider/openai/OpenAITranscriptionModel.d.ts +1 -1
  31. package/package.json +1 -1
  32. package/util/api/postToApi.cjs +14 -1
  33. package/util/api/postToApi.d.ts +2 -0
  34. package/util/api/postToApi.js +12 -0
  35. /package/model-function/{transcribe-audio/TranscriptionEvent.cjs → synthesize-speech/SpeechSynthesisEvent.cjs} +0 -0
  36. /package/model-function/{transcribe-audio/TranscriptionEvent.js → synthesize-speech/SpeechSynthesisEvent.js} +0 -0
  37. /package/model-function/{transcribe-audio/TranscriptionModel.cjs → synthesize-speech/SpeechSynthesisModel.cjs} +0 -0
  38. /package/model-function/{transcribe-audio/TranscriptionModel.js → synthesize-speech/SpeechSynthesisModel.js} +0 -0
  39. /package/model-function/{transcribe-audio → transcribe-speech}/TranscriptionEvent.d.ts +0 -0
  40. /package/model-function/{transcribe-audio → transcribe-speech}/TranscriptionModel.d.ts +0 -0
  41. /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.cjs +0 -0
  42. /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.d.ts +0 -0
  43. /package/model-function/{transcribe-audio → transcribe-speech}/transcribe.js +0 -0
package/README.md CHANGED
@@ -260,9 +260,9 @@ const { tool, parameters, result, text } = await useToolOrGenerateText(
260
260
  );
261
261
  ```
262
262
 
263
- ### [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
263
+ ### [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
264
264
 
265
- Turn audio (voice) into text.
265
+ Turn speech (audio) into text.
266
266
 
267
267
  ```ts
268
268
  const transcription = await transcribe(
@@ -274,6 +274,20 @@ const transcription = await transcribe(
274
274
  );
275
275
  ```
276
276
 
277
+ ### Synthesize Speech
278
+
279
+ Turn text into speech (audio).
280
+
281
+ ```ts
282
+ // `speech` is a Buffer with MP3 audio data
283
+ const speech = await synthesizeSpeech(
284
+ new ElevenLabsSpeechSynthesisModel({
285
+ voice: "ErXwobaYiN019PkySvjV",
286
+ }),
287
+ "Hello, World!"
288
+ );
289
+ ```
290
+
277
291
  ### [Generate Image](https://modelfusion.dev/guide/function/generate-image)
278
292
 
279
293
  Generate a base64-encoded image from a prompt.
@@ -356,7 +370,7 @@ const { chunks } = await retrieveTextChunks(
356
370
  - [Generate JSON or text](https://modelfusion.dev/guide/function/generate-json-or-text)
357
371
  - [Embed Text](https://modelfusion.dev/guide/function/embed-text)
358
372
  - [Tokenize Text](https://modelfusion.dev/guide/function/tokenize-text)
359
- - [Transcribe Audio](https://modelfusion.dev/guide/function/transcribe-audio)
373
+ - [Transcribe Speech](https://modelfusion.dev/guide/function/transcribe-speech)
360
374
  - [Generate images](https://modelfusion.dev/guide/function/generate-image)
361
375
  - Summarize text
362
376
  - [Tools](https://modelfusion.dev/guide/tools)
@@ -385,7 +399,7 @@ const { chunks } = await retrieveTextChunks(
385
399
  | [Embed text](https://modelfusion.dev/guide/function/embed-text) | ✅ | ✅ | ✅ | ✅ | | |
386
400
  | [Tokenize text](https://modelfusion.dev/guide/function/tokenize-text) | full | full | basic | | | |
387
401
  | [Generate image](https://modelfusion.dev/guide/function/generate-image) | ✅ | | | | ✅ | ✅ |
388
- | [Transcribe audio](https://modelfusion.dev/guide/function/transcribe-audio) | ✅ | | | | | |
402
+ | [Transcribe speech](https://modelfusion.dev/guide/function/transcribe-speech) | ✅ | | | | | |
389
403
  | [Cost calculation](https://modelfusion.dev/guide/run/cost-calculation) | ✅ | | | | | |
390
404
 
391
405
  ### Vector Indices
@@ -5,12 +5,13 @@ import { ImageGenerationFinishedEvent, ImageGenerationStartedEvent } from "./gen
5
5
  import { JsonGenerationFinishedEvent, JsonGenerationStartedEvent } from "./generate-json/JsonGenerationEvent.js";
6
6
  import { TextGenerationFinishedEvent, TextGenerationStartedEvent } from "./generate-text/TextGenerationEvent.js";
7
7
  import { TextStreamingFinishedEvent, TextStreamingStartedEvent } from "./generate-text/TextStreamingEvent.js";
8
- import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-audio/TranscriptionEvent.js";
8
+ import { SpeechSynthesisFinishedEvent, SpeechSynthesisStartedEvent } from "./synthesize-speech/SpeechSynthesisEvent.js";
9
+ import { TranscriptionFinishedEvent, TranscriptionStartedEvent } from "./transcribe-speech/TranscriptionEvent.js";
9
10
  export type ModelCallStartedEventMetadata = RunFunctionStartedEventMetadata & {
10
11
  model: ModelInformation;
11
12
  };
12
- export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
13
+ export type ModelCallStartedEvent = ImageGenerationStartedEvent | JsonGenerationStartedEvent | SpeechSynthesisStartedEvent | TextEmbeddingStartedEvent | TextGenerationStartedEvent | TextStreamingStartedEvent | TranscriptionStartedEvent;
13
14
  export type ModelCallFinishedEventMetadata = RunFunctionFinishedEventMetadata & {
14
15
  model: ModelInformation;
15
16
  };
16
- export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
17
+ export type ModelCallFinishedEvent = ImageGenerationFinishedEvent | JsonGenerationFinishedEvent | SpeechSynthesisFinishedEvent | TextEmbeddingFinishedEvent | TextGenerationFinishedEvent | TextStreamingFinishedEvent | TranscriptionFinishedEvent;
@@ -18,6 +18,7 @@ const eventTypeToCostType = {
18
18
  "image-generation-finished": "image-generation",
19
19
  "json-generation-finished": "json-generation",
20
20
  "json-or-text-generation-finished": "json-or-text-generation",
21
+ "speech-synthesis-finished": "speech-synthesis",
21
22
  "text-embedding-finished": "text-embedding",
22
23
  "text-generation-finished": "text-generation",
23
24
  "text-streaming-finished": "text-streaming",
@@ -1,7 +1,7 @@
1
1
  import { RunFunctionEvent } from "../run/RunFunctionEvent.js";
2
2
  import { ModelInformation } from "./ModelInformation.js";
3
3
  export type SuccessfulModelCall = {
4
- type: "image-generation" | "json-generation" | "json-or-text-generation" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
4
+ type: "image-generation" | "json-generation" | "json-or-text-generation" | "speech-synthesis" | "text-embedding" | "text-generation" | "text-streaming" | "transcription";
5
5
  model: ModelInformation;
6
6
  settings: unknown;
7
7
  response: unknown;
@@ -14,6 +14,7 @@ const eventTypeToCostType = {
14
14
  "image-generation-finished": "image-generation",
15
15
  "json-generation-finished": "json-generation",
16
16
  "json-or-text-generation-finished": "json-or-text-generation",
17
+ "speech-synthesis-finished": "speech-synthesis",
17
18
  "text-embedding-finished": "text-embedding",
18
19
  "text-generation-finished": "text-generation",
19
20
  "text-streaming-finished": "text-streaming",
@@ -41,8 +41,11 @@ __exportStar(require("./generate-text/TextGenerationEvent.cjs"), exports);
41
41
  __exportStar(require("./generate-text/TextGenerationModel.cjs"), exports);
42
42
  __exportStar(require("./generate-text/generateText.cjs"), exports);
43
43
  __exportStar(require("./generate-text/streamText.cjs"), exports);
44
+ __exportStar(require("./synthesize-speech/SpeechSynthesisEvent.cjs"), exports);
45
+ __exportStar(require("./synthesize-speech/SpeechSynthesisModel.cjs"), exports);
46
+ __exportStar(require("./synthesize-speech/synthesizeSpeech.cjs"), exports);
44
47
  __exportStar(require("./tokenize-text/Tokenizer.cjs"), exports);
45
48
  __exportStar(require("./tokenize-text/countTokens.cjs"), exports);
46
- __exportStar(require("./transcribe-audio/TranscriptionEvent.cjs"), exports);
47
- __exportStar(require("./transcribe-audio/TranscriptionModel.cjs"), exports);
48
- __exportStar(require("./transcribe-audio/transcribe.cjs"), exports);
49
+ __exportStar(require("./transcribe-speech/TranscriptionEvent.cjs"), exports);
50
+ __exportStar(require("./transcribe-speech/TranscriptionModel.cjs"), exports);
51
+ __exportStar(require("./transcribe-speech/transcribe.cjs"), exports);
@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
25
25
  export * from "./generate-text/TextGenerationModel.js";
26
26
  export * from "./generate-text/generateText.js";
27
27
  export * from "./generate-text/streamText.js";
28
+ export * from "./synthesize-speech/SpeechSynthesisEvent.js";
29
+ export * from "./synthesize-speech/SpeechSynthesisModel.js";
30
+ export * from "./synthesize-speech/synthesizeSpeech.js";
28
31
  export * from "./tokenize-text/Tokenizer.js";
29
32
  export * from "./tokenize-text/countTokens.js";
30
- export * from "./transcribe-audio/TranscriptionEvent.js";
31
- export * from "./transcribe-audio/TranscriptionModel.js";
32
- export * from "./transcribe-audio/transcribe.js";
33
+ export * from "./transcribe-speech/TranscriptionEvent.js";
34
+ export * from "./transcribe-speech/TranscriptionModel.js";
35
+ export * from "./transcribe-speech/transcribe.js";
@@ -25,8 +25,11 @@ export * from "./generate-text/TextGenerationEvent.js";
25
25
  export * from "./generate-text/TextGenerationModel.js";
26
26
  export * from "./generate-text/generateText.js";
27
27
  export * from "./generate-text/streamText.js";
28
+ export * from "./synthesize-speech/SpeechSynthesisEvent.js";
29
+ export * from "./synthesize-speech/SpeechSynthesisModel.js";
30
+ export * from "./synthesize-speech/synthesizeSpeech.js";
28
31
  export * from "./tokenize-text/Tokenizer.js";
29
32
  export * from "./tokenize-text/countTokens.js";
30
- export * from "./transcribe-audio/TranscriptionEvent.js";
31
- export * from "./transcribe-audio/TranscriptionModel.js";
32
- export * from "./transcribe-audio/transcribe.js";
33
+ export * from "./transcribe-speech/TranscriptionEvent.js";
34
+ export * from "./transcribe-speech/TranscriptionModel.js";
35
+ export * from "./transcribe-speech/transcribe.js";
@@ -0,0 +1,22 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { ModelCallFinishedEventMetadata, ModelCallStartedEventMetadata } from "../ModelCallEvent.js";
3
+ export type SpeechSynthesisStartedEvent = {
4
+ type: "speech-synthesis-started";
5
+ metadata: ModelCallStartedEventMetadata;
6
+ settings: unknown;
7
+ text: string;
8
+ };
9
+ export type SpeechSynthesisFinishedEvent = {
10
+ type: "speech-synthesis-finished";
11
+ metadata: ModelCallFinishedEventMetadata;
12
+ settings: unknown;
13
+ text: string;
14
+ } & ({
15
+ status: "success";
16
+ response: Buffer;
17
+ } | {
18
+ status: "failure";
19
+ error: unknown;
20
+ } | {
21
+ status: "abort";
22
+ });
@@ -0,0 +1,11 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { FunctionOptions } from "../FunctionOptions.js";
3
+ import { Model, ModelSettings } from "../Model.js";
4
+ export interface SpeechSynthesisModelSettings extends ModelSettings {
5
+ }
6
+ export interface SpeechSynthesisModel<SETTINGS> extends Model<SETTINGS> {
7
+ /**
8
+ * Generates an mp3 audio buffer that contains the speech for the given text.
9
+ */
10
+ generateSpeechResponse: (text: string, options?: FunctionOptions<SETTINGS>) => PromiseLike<Buffer>;
11
+ }
@@ -0,0 +1,49 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.synthesizeSpeech = void 0;
4
+ const executeCall_js_1 = require("../executeCall.cjs");
5
+ async function synthesizeSpeech(model, text, options) {
6
+ const result = await (0, executeCall_js_1.executeCall)({
7
+ model,
8
+ options,
9
+ generateResponse: (options) => model.generateSpeechResponse(text, options),
10
+ extractOutputValue: (buffer) => buffer,
11
+ getStartEvent: (metadata, settings) => ({
12
+ type: "speech-synthesis-started",
13
+ metadata,
14
+ settings,
15
+ text,
16
+ }),
17
+ getAbortEvent: (metadata, settings) => ({
18
+ type: "speech-synthesis-finished",
19
+ status: "abort",
20
+ settings,
21
+ metadata,
22
+ text,
23
+ }),
24
+ getFailureEvent: (metadata, settings, error) => ({
25
+ type: "speech-synthesis-finished",
26
+ status: "failure",
27
+ metadata,
28
+ settings,
29
+ text,
30
+ error,
31
+ }),
32
+ getSuccessEvent: (metadata, settings, response, output) => ({
33
+ type: "speech-synthesis-finished",
34
+ status: "success",
35
+ metadata,
36
+ settings,
37
+ text,
38
+ response,
39
+ speech: output,
40
+ }),
41
+ });
42
+ return options?.fullResponse === true
43
+ ? {
44
+ speech: result.output,
45
+ metadata: result.metadata,
46
+ }
47
+ : result.output;
48
+ }
49
+ exports.synthesizeSpeech = synthesizeSpeech;
@@ -0,0 +1,13 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { FunctionOptions } from "../FunctionOptions.js";
3
+ import { CallMetadata } from "../executeCall.js";
4
+ import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
5
+ export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options: FunctionOptions<SETTINGS> & {
6
+ fullResponse: true;
7
+ }): Promise<{
8
+ speech: Buffer;
9
+ metadata: CallMetadata<SpeechSynthesisModel<SETTINGS>>;
10
+ }>;
11
+ export declare function synthesizeSpeech<SETTINGS extends SpeechSynthesisModelSettings>(model: SpeechSynthesisModel<SETTINGS>, text: string, options?: FunctionOptions<SETTINGS> & {
12
+ fullResponse?: false;
13
+ }): Promise<Buffer>;
@@ -0,0 +1,45 @@
1
+ import { executeCall } from "../executeCall.js";
2
+ export async function synthesizeSpeech(model, text, options) {
3
+ const result = await executeCall({
4
+ model,
5
+ options,
6
+ generateResponse: (options) => model.generateSpeechResponse(text, options),
7
+ extractOutputValue: (buffer) => buffer,
8
+ getStartEvent: (metadata, settings) => ({
9
+ type: "speech-synthesis-started",
10
+ metadata,
11
+ settings,
12
+ text,
13
+ }),
14
+ getAbortEvent: (metadata, settings) => ({
15
+ type: "speech-synthesis-finished",
16
+ status: "abort",
17
+ settings,
18
+ metadata,
19
+ text,
20
+ }),
21
+ getFailureEvent: (metadata, settings, error) => ({
22
+ type: "speech-synthesis-finished",
23
+ status: "failure",
24
+ metadata,
25
+ settings,
26
+ text,
27
+ error,
28
+ }),
29
+ getSuccessEvent: (metadata, settings, response, output) => ({
30
+ type: "speech-synthesis-finished",
31
+ status: "success",
32
+ metadata,
33
+ settings,
34
+ text,
35
+ response,
36
+ speech: output,
37
+ }),
38
+ });
39
+ return options?.fullResponse === true
40
+ ? {
41
+ speech: result.output,
42
+ metadata: result.metadata,
43
+ }
44
+ : result.output;
45
+ }
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,2 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -0,0 +1,31 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.failedElevenLabsCallResponseHandler = void 0;
4
+ const ApiCallError_js_1 = require("../../util/api/ApiCallError.cjs");
5
+ const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
6
+ const responseBody = await response.text();
7
+ try {
8
+ // TODO implement ElevenLabsError
9
+ return new ApiCallError_js_1.ApiCallError({
10
+ message: responseBody,
11
+ statusCode: response.status,
12
+ url,
13
+ requestBodyValues,
14
+ });
15
+ }
16
+ catch (error) {
17
+ if (error instanceof Error) {
18
+ if (error.name === "AbortError" || error instanceof ApiCallError_js_1.ApiCallError) {
19
+ throw error;
20
+ }
21
+ }
22
+ throw new ApiCallError_js_1.ApiCallError({
23
+ message: responseBody,
24
+ cause: error,
25
+ statusCode: response.status,
26
+ url,
27
+ requestBodyValues,
28
+ });
29
+ }
30
+ };
31
+ exports.failedElevenLabsCallResponseHandler = failedElevenLabsCallResponseHandler;
@@ -0,0 +1,3 @@
1
+ import { ApiCallError } from "../../util/api/ApiCallError.js";
2
+ import { ResponseHandler } from "../../util/api/postToApi.js";
3
+ export declare const failedElevenLabsCallResponseHandler: ResponseHandler<ApiCallError>;
@@ -0,0 +1,27 @@
1
+ import { ApiCallError } from "../../util/api/ApiCallError.js";
2
+ export const failedElevenLabsCallResponseHandler = async ({ response, url, requestBodyValues }) => {
3
+ const responseBody = await response.text();
4
+ try {
5
+ // TODO implement ElevenLabsError
6
+ return new ApiCallError({
7
+ message: responseBody,
8
+ statusCode: response.status,
9
+ url,
10
+ requestBodyValues,
11
+ });
12
+ }
13
+ catch (error) {
14
+ if (error instanceof Error) {
15
+ if (error.name === "AbortError" || error instanceof ApiCallError) {
16
+ throw error;
17
+ }
18
+ }
19
+ throw new ApiCallError({
20
+ message: responseBody,
21
+ cause: error,
22
+ statusCode: response.status,
23
+ url,
24
+ requestBodyValues,
25
+ });
26
+ }
27
+ };
@@ -0,0 +1,77 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ElevenLabsSpeechSynthesisModel = void 0;
4
+ const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
5
+ const callWithRetryAndThrottle_js_1 = require("../../util/api/callWithRetryAndThrottle.cjs");
6
+ const postToApi_js_1 = require("../../util/api/postToApi.cjs");
7
+ const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
8
+ class ElevenLabsSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
9
+ constructor(settings) {
10
+ super({ settings });
11
+ Object.defineProperty(this, "provider", {
12
+ enumerable: true,
13
+ configurable: true,
14
+ writable: true,
15
+ value: "elevenlabs"
16
+ });
17
+ Object.defineProperty(this, "modelName", {
18
+ enumerable: true,
19
+ configurable: true,
20
+ writable: true,
21
+ value: null
22
+ });
23
+ }
24
+ get apiKey() {
25
+ const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
26
+ if (apiKey == null) {
27
+ throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
28
+ }
29
+ return apiKey;
30
+ }
31
+ async callAPI(text, options) {
32
+ const run = options?.run;
33
+ const settings = options?.settings;
34
+ const combinedSettings = {
35
+ ...this.settings,
36
+ ...settings,
37
+ };
38
+ return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
39
+ retry: this.settings.retry,
40
+ throttle: this.settings.throttle,
41
+ call: async () => callElevenLabsTextToSpeechAPI({
42
+ baseUrl: combinedSettings.baseUrl,
43
+ abortSignal: run?.abortSignal,
44
+ apiKey: this.apiKey,
45
+ text,
46
+ voiceId: combinedSettings.voice,
47
+ }),
48
+ });
49
+ }
50
+ generateSpeechResponse(text, options) {
51
+ return this.callAPI(text, options);
52
+ }
53
+ withSettings(additionalSettings) {
54
+ return new ElevenLabsSpeechSynthesisModel({
55
+ ...this.settings,
56
+ ...additionalSettings,
57
+ });
58
+ }
59
+ }
60
+ exports.ElevenLabsSpeechSynthesisModel = ElevenLabsSpeechSynthesisModel;
61
+ /**
62
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
63
+ */
64
+ async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, }) {
65
+ return (0, postToApi_js_1.postJsonToApi)({
66
+ url: `${baseUrl}/text-to-speech/${voiceId}`,
67
+ headers: {
68
+ "xi-api-key": apiKey,
69
+ },
70
+ body: {
71
+ text,
72
+ },
73
+ failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
74
+ successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
75
+ abortSignal,
76
+ });
77
+ }
@@ -0,0 +1,22 @@
1
+ /// <reference types="node" resolution-mode="require"/>
2
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
3
+ import { FunctionOptions } from "../../model-function/FunctionOptions.js";
4
+ import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "../../model-function/synthesize-speech/SpeechSynthesisModel.js";
5
+ import { RetryFunction } from "../../util/api/RetryFunction.js";
6
+ import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
7
+ export interface ElevenLabsSpeechSynthesisModelSettings extends SpeechSynthesisModelSettings {
8
+ voice: string;
9
+ baseUrl?: string;
10
+ apiKey?: string;
11
+ retry?: RetryFunction;
12
+ throttle?: ThrottleFunction;
13
+ }
14
+ export declare class ElevenLabsSpeechSynthesisModel extends AbstractModel<ElevenLabsSpeechSynthesisModelSettings> implements SpeechSynthesisModel<ElevenLabsSpeechSynthesisModelSettings> {
15
+ constructor(settings: ElevenLabsSpeechSynthesisModelSettings);
16
+ readonly provider = "elevenlabs";
17
+ readonly modelName: null;
18
+ private get apiKey();
19
+ private callAPI;
20
+ generateSpeechResponse(text: string, options?: FunctionOptions<ElevenLabsSpeechSynthesisModelSettings> | undefined): Promise<Buffer>;
21
+ withSettings(additionalSettings: Partial<ElevenLabsSpeechSynthesisModelSettings>): this;
22
+ }
@@ -0,0 +1,73 @@
1
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
2
+ import { callWithRetryAndThrottle } from "../../util/api/callWithRetryAndThrottle.js";
3
+ import { createAudioMpegResponseHandler, postJsonToApi, } from "../../util/api/postToApi.js";
4
+ import { failedElevenLabsCallResponseHandler } from "./ElevenLabsError.js";
5
+ export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
6
+ constructor(settings) {
7
+ super({ settings });
8
+ Object.defineProperty(this, "provider", {
9
+ enumerable: true,
10
+ configurable: true,
11
+ writable: true,
12
+ value: "elevenlabs"
13
+ });
14
+ Object.defineProperty(this, "modelName", {
15
+ enumerable: true,
16
+ configurable: true,
17
+ writable: true,
18
+ value: null
19
+ });
20
+ }
21
+ get apiKey() {
22
+ const apiKey = this.settings.apiKey ?? process.env.ELEVENLABS_API_KEY;
23
+ if (apiKey == null) {
24
+ throw new Error("No ElevenLabs API key provided. Pass it in the constructor or set the ELEVENLABS_API_KEY environment variable.");
25
+ }
26
+ return apiKey;
27
+ }
28
+ async callAPI(text, options) {
29
+ const run = options?.run;
30
+ const settings = options?.settings;
31
+ const combinedSettings = {
32
+ ...this.settings,
33
+ ...settings,
34
+ };
35
+ return callWithRetryAndThrottle({
36
+ retry: this.settings.retry,
37
+ throttle: this.settings.throttle,
38
+ call: async () => callElevenLabsTextToSpeechAPI({
39
+ baseUrl: combinedSettings.baseUrl,
40
+ abortSignal: run?.abortSignal,
41
+ apiKey: this.apiKey,
42
+ text,
43
+ voiceId: combinedSettings.voice,
44
+ }),
45
+ });
46
+ }
47
+ generateSpeechResponse(text, options) {
48
+ return this.callAPI(text, options);
49
+ }
50
+ withSettings(additionalSettings) {
51
+ return new ElevenLabsSpeechSynthesisModel({
52
+ ...this.settings,
53
+ ...additionalSettings,
54
+ });
55
+ }
56
+ }
57
+ /**
58
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
59
+ */
60
+ async function callElevenLabsTextToSpeechAPI({ baseUrl = "https://api.elevenlabs.io/v1", abortSignal, apiKey, text, voiceId, }) {
61
+ return postJsonToApi({
62
+ url: `${baseUrl}/text-to-speech/${voiceId}`,
63
+ headers: {
64
+ "xi-api-key": apiKey,
65
+ },
66
+ body: {
67
+ text,
68
+ },
69
+ failedResponseHandler: failedElevenLabsCallResponseHandler,
70
+ successfulResponseHandler: createAudioMpegResponseHandler(),
71
+ abortSignal,
72
+ });
73
+ }
@@ -0,0 +1,17 @@
1
+ "use strict";
2
+ var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
+ if (k2 === undefined) k2 = k;
4
+ var desc = Object.getOwnPropertyDescriptor(m, k);
5
+ if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
+ desc = { enumerable: true, get: function() { return m[k]; } };
7
+ }
8
+ Object.defineProperty(o, k2, desc);
9
+ }) : (function(o, m, k, k2) {
10
+ if (k2 === undefined) k2 = k;
11
+ o[k2] = m[k];
12
+ }));
13
+ var __exportStar = (this && this.__exportStar) || function(m, exports) {
14
+ for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
15
+ };
16
+ Object.defineProperty(exports, "__esModule", { value: true });
17
+ __exportStar(require("./ElevenLabsSpeechSynthesisModel.cjs"), exports);
@@ -0,0 +1 @@
1
+ export * from "./ElevenLabsSpeechSynthesisModel.js";
@@ -0,0 +1 @@
1
+ export * from "./ElevenLabsSpeechSynthesisModel.js";
@@ -16,6 +16,7 @@ var __exportStar = (this && this.__exportStar) || function(m, exports) {
16
16
  Object.defineProperty(exports, "__esModule", { value: true });
17
17
  __exportStar(require("./automatic1111/index.cjs"), exports);
18
18
  __exportStar(require("./cohere/index.cjs"), exports);
19
+ __exportStar(require("./elevenlabs/index.cjs"), exports);
19
20
  __exportStar(require("./huggingface/index.cjs"), exports);
20
21
  __exportStar(require("./llamacpp/index.cjs"), exports);
21
22
  __exportStar(require("./openai/index.cjs"), exports);
@@ -1,5 +1,6 @@
1
1
  export * from "./automatic1111/index.js";
2
2
  export * from "./cohere/index.js";
3
+ export * from "./elevenlabs/index.js";
3
4
  export * from "./huggingface/index.js";
4
5
  export * from "./llamacpp/index.js";
5
6
  export * from "./openai/index.js";
@@ -1,5 +1,6 @@
1
1
  export * from "./automatic1111/index.js";
2
2
  export * from "./cohere/index.js";
3
+ export * from "./elevenlabs/index.js";
3
4
  export * from "./huggingface/index.js";
4
5
  export * from "./llamacpp/index.js";
5
6
  export * from "./openai/index.js";
@@ -2,7 +2,7 @@
2
2
  import z from "zod";
3
3
  import { AbstractModel } from "../../model-function/AbstractModel.js";
4
4
  import { FunctionOptions } from "../../model-function/FunctionOptions.js";
5
- import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-audio/TranscriptionModel.js";
5
+ import { TranscriptionModel, TranscriptionModelSettings } from "../../model-function/transcribe-speech/TranscriptionModel.js";
6
6
  import { RetryFunction } from "../../util/api/RetryFunction.js";
7
7
  import { ThrottleFunction } from "../../util/api/ThrottleFunction.js";
8
8
  import { ResponseHandler } from "../../util/api/postToApi.js";
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "modelfusion",
3
3
  "description": "Build AI applications, chatbots, and agents with JavaScript and TypeScript.",
4
- "version": "0.15.0",
4
+ "version": "0.17.0",
5
5
  "author": "Lars Grammel",
6
6
  "license": "MIT",
7
7
  "keywords": [
@@ -1,6 +1,6 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.postToApi = exports.postJsonToApi = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
3
+ exports.postToApi = exports.postJsonToApi = exports.createAudioMpegResponseHandler = exports.createTextResponseHandler = exports.createJsonResponseHandler = void 0;
4
4
  const ApiCallError_js_1 = require("./ApiCallError.cjs");
5
5
  const createJsonResponseHandler = (responseSchema) => async ({ response, url, requestBodyValues }) => {
6
6
  const parsedResult = responseSchema.safeParse(await response.json());
@@ -18,6 +18,19 @@ const createJsonResponseHandler = (responseSchema) => async ({ response, url, re
18
18
  exports.createJsonResponseHandler = createJsonResponseHandler;
19
19
  const createTextResponseHandler = () => async ({ response }) => response.text();
20
20
  exports.createTextResponseHandler = createTextResponseHandler;
21
+ const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
22
+ if (response.headers.get("Content-Type") !== "audio/mpeg") {
23
+ throw new ApiCallError_js_1.ApiCallError({
24
+ message: "Invalid Content-Type (must be audio/mpeg)",
25
+ statusCode: response.status,
26
+ url,
27
+ requestBodyValues,
28
+ });
29
+ }
30
+ const arrayBuffer = await response.arrayBuffer();
31
+ return Buffer.from(arrayBuffer);
32
+ };
33
+ exports.createAudioMpegResponseHandler = createAudioMpegResponseHandler;
21
34
  const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => (0, exports.postToApi)({
22
35
  url,
23
36
  headers: {
@@ -1,3 +1,4 @@
1
+ /// <reference types="node" resolution-mode="require"/>
1
2
  import { z } from "zod";
2
3
  import { ApiCallError } from "./ApiCallError.js";
3
4
  export type ResponseHandler<T> = (options: {
@@ -7,6 +8,7 @@ export type ResponseHandler<T> = (options: {
7
8
  }) => PromiseLike<T>;
8
9
  export declare const createJsonResponseHandler: <T>(responseSchema: z.ZodType<T, z.ZodTypeDef, T>) => ResponseHandler<T>;
9
10
  export declare const createTextResponseHandler: () => ResponseHandler<string>;
11
+ export declare const createAudioMpegResponseHandler: () => ResponseHandler<Buffer>;
10
12
  export declare const postJsonToApi: <T>({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }: {
11
13
  url: string;
12
14
  headers?: Record<string, string> | undefined;
@@ -13,6 +13,18 @@ export const createJsonResponseHandler = (responseSchema) => async ({ response,
13
13
  return parsedResult.data;
14
14
  };
15
15
  export const createTextResponseHandler = () => async ({ response }) => response.text();
16
+ export const createAudioMpegResponseHandler = () => async ({ response, url, requestBodyValues }) => {
17
+ if (response.headers.get("Content-Type") !== "audio/mpeg") {
18
+ throw new ApiCallError({
19
+ message: "Invalid Content-Type (must be audio/mpeg)",
20
+ statusCode: response.status,
21
+ url,
22
+ requestBodyValues,
23
+ });
24
+ }
25
+ const arrayBuffer = await response.arrayBuffer();
26
+ return Buffer.from(arrayBuffer);
27
+ };
16
28
  export const postJsonToApi = async ({ url, headers, body, failedResponseHandler, successfulResponseHandler, abortSignal, }) => postToApi({
17
29
  url,
18
30
  headers: {