modelfusion 0.47.3 → 0.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (125) hide show
  1. package/README.md +55 -33
  2. package/core/getRun.cjs +5 -3
  3. package/core/getRun.js +5 -3
  4. package/index.cjs +1 -0
  5. package/index.d.ts +1 -0
  6. package/index.js +1 -0
  7. package/model-function/AsyncIterableResultPromise.cjs +5 -5
  8. package/model-function/AsyncIterableResultPromise.d.ts +3 -3
  9. package/model-function/AsyncIterableResultPromise.js +5 -5
  10. package/model-function/Model.d.ts +1 -1
  11. package/model-function/ModelCallEvent.d.ts +5 -7
  12. package/model-function/embed/embed.cjs +3 -3
  13. package/model-function/embed/embed.js +3 -3
  14. package/model-function/{executeCall.cjs → executeStandardCall.cjs} +3 -3
  15. package/model-function/{executeCall.d.ts → executeStandardCall.d.ts} +1 -1
  16. package/model-function/{executeCall.js → executeStandardCall.js} +1 -1
  17. package/model-function/executeStreamCall.cjs +132 -0
  18. package/model-function/executeStreamCall.d.ts +20 -0
  19. package/model-function/executeStreamCall.js +128 -0
  20. package/model-function/generate-image/generateImage.cjs +2 -2
  21. package/model-function/generate-image/generateImage.js +2 -2
  22. package/model-function/generate-speech/SpeechGenerationEvent.d.ts +27 -0
  23. package/model-function/generate-speech/SpeechGenerationModel.d.ts +15 -0
  24. package/model-function/{synthesize-speech/synthesizeSpeech.cjs → generate-speech/generateSpeech.cjs} +7 -7
  25. package/model-function/{synthesize-speech/synthesizeSpeech.d.ts → generate-speech/generateSpeech.d.ts} +2 -2
  26. package/model-function/{synthesize-speech/synthesizeSpeech.js → generate-speech/generateSpeech.js} +5 -5
  27. package/model-function/generate-speech/index.cjs +20 -0
  28. package/model-function/generate-speech/index.d.ts +4 -0
  29. package/model-function/generate-speech/index.js +4 -0
  30. package/model-function/generate-speech/streamSpeech.cjs +34 -0
  31. package/model-function/generate-speech/streamSpeech.d.ts +8 -0
  32. package/model-function/generate-speech/streamSpeech.js +30 -0
  33. package/model-function/generate-structure/generateStructure.cjs +2 -2
  34. package/model-function/generate-structure/generateStructure.js +2 -2
  35. package/model-function/generate-structure/generateStructureOrText.cjs +2 -2
  36. package/model-function/generate-structure/generateStructureOrText.js +2 -2
  37. package/model-function/generate-structure/index.cjs +27 -0
  38. package/model-function/generate-structure/index.d.ts +11 -0
  39. package/model-function/generate-structure/index.js +11 -0
  40. package/model-function/generate-structure/streamStructure.cjs +28 -136
  41. package/model-function/generate-structure/streamStructure.js +27 -135
  42. package/model-function/generate-text/TextGenerationEvent.d.ts +6 -0
  43. package/model-function/generate-text/generateText.cjs +3 -3
  44. package/model-function/generate-text/generateText.d.ts +1 -1
  45. package/model-function/generate-text/generateText.js +3 -3
  46. package/model-function/generate-text/index.cjs +0 -1
  47. package/model-function/generate-text/index.d.ts +0 -1
  48. package/model-function/generate-text/index.js +0 -1
  49. package/model-function/generate-text/streamText.cjs +21 -128
  50. package/model-function/generate-text/streamText.js +20 -127
  51. package/model-function/generate-text/trimChatPrompt.cjs +1 -1
  52. package/model-function/generate-text/trimChatPrompt.d.ts +1 -1
  53. package/model-function/generate-text/trimChatPrompt.js +1 -1
  54. package/model-function/{transcribe-speech/transcribe.cjs → generate-transcription/generateTranscription.cjs} +6 -6
  55. package/model-function/{transcribe-speech/transcribe.d.ts → generate-transcription/generateTranscription.d.ts} +2 -2
  56. package/model-function/{transcribe-speech/transcribe.js → generate-transcription/generateTranscription.js} +4 -4
  57. package/model-function/index.cjs +5 -20
  58. package/model-function/index.d.ts +5 -20
  59. package/model-function/index.js +5 -20
  60. package/model-provider/elevenlabs/ElevenLabsApiConfiguration.cjs +3 -0
  61. package/model-provider/elevenlabs/ElevenLabsApiConfiguration.d.ts +1 -0
  62. package/model-provider/elevenlabs/ElevenLabsApiConfiguration.js +3 -0
  63. package/model-provider/elevenlabs/ElevenLabsSpeechModel.cjs +191 -0
  64. package/model-provider/elevenlabs/ElevenLabsSpeechModel.d.ts +39 -0
  65. package/model-provider/elevenlabs/ElevenLabsSpeechModel.js +187 -0
  66. package/model-provider/elevenlabs/index.cjs +1 -1
  67. package/model-provider/elevenlabs/index.d.ts +1 -1
  68. package/model-provider/elevenlabs/index.js +1 -1
  69. package/model-provider/huggingface/HuggingFaceImageDescriptionModel.cjs +21 -2
  70. package/model-provider/huggingface/HuggingFaceImageDescriptionModel.d.ts +11 -6
  71. package/model-provider/huggingface/HuggingFaceImageDescriptionModel.js +21 -2
  72. package/model-provider/lmnt/{LmntSpeechSynthesisModel.cjs → LmntSpeechModel.cjs} +5 -5
  73. package/model-provider/lmnt/LmntSpeechModel.d.ts +26 -0
  74. package/model-provider/lmnt/{LmntSpeechSynthesisModel.js → LmntSpeechModel.js} +3 -3
  75. package/model-provider/lmnt/index.cjs +1 -1
  76. package/model-provider/lmnt/index.d.ts +1 -1
  77. package/model-provider/lmnt/index.js +1 -1
  78. package/model-provider/openai/{OpenAITextGenerationModel.cjs → OpenAICompletionModel.cjs} +17 -17
  79. package/model-provider/openai/{OpenAITextGenerationModel.d.ts → OpenAICompletionModel.d.ts} +25 -25
  80. package/model-provider/openai/{OpenAITextGenerationModel.js → OpenAICompletionModel.js} +12 -12
  81. package/model-provider/openai/OpenAICostCalculator.cjs +3 -3
  82. package/model-provider/openai/OpenAICostCalculator.js +3 -3
  83. package/model-provider/openai/OpenAITranscriptionModel.d.ts +1 -1
  84. package/model-provider/openai/TikTokenTokenizer.d.ts +2 -2
  85. package/model-provider/openai/index.cjs +1 -1
  86. package/model-provider/openai/index.d.ts +1 -1
  87. package/model-provider/openai/index.js +1 -1
  88. package/package.json +3 -1
  89. package/ui/MediaSourceAppender.cjs +54 -0
  90. package/ui/MediaSourceAppender.d.ts +11 -0
  91. package/ui/MediaSourceAppender.js +50 -0
  92. package/ui/index.cjs +17 -0
  93. package/ui/index.d.ts +1 -0
  94. package/ui/index.js +1 -0
  95. package/util/SimpleWebSocket.cjs +41 -0
  96. package/util/SimpleWebSocket.d.ts +12 -0
  97. package/util/SimpleWebSocket.js +14 -0
  98. package/model-function/describe-image/ImageDescriptionEvent.d.ts +0 -18
  99. package/model-function/describe-image/ImageDescriptionModel.d.ts +0 -10
  100. package/model-function/describe-image/describeImage.cjs +0 -26
  101. package/model-function/describe-image/describeImage.d.ts +0 -9
  102. package/model-function/describe-image/describeImage.js +0 -22
  103. package/model-function/generate-text/TextStreamingEvent.cjs +0 -2
  104. package/model-function/generate-text/TextStreamingEvent.d.ts +0 -7
  105. package/model-function/generate-text/TextStreamingEvent.js +0 -1
  106. package/model-function/synthesize-speech/SpeechSynthesisEvent.cjs +0 -2
  107. package/model-function/synthesize-speech/SpeechSynthesisEvent.d.ts +0 -21
  108. package/model-function/synthesize-speech/SpeechSynthesisEvent.js +0 -1
  109. package/model-function/synthesize-speech/SpeechSynthesisModel.cjs +0 -2
  110. package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts +0 -11
  111. package/model-function/synthesize-speech/SpeechSynthesisModel.js +0 -1
  112. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +0 -79
  113. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +0 -30
  114. package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +0 -75
  115. package/model-provider/lmnt/LmntSpeechSynthesisModel.d.ts +0 -26
  116. /package/model-function/{describe-image/ImageDescriptionEvent.cjs → generate-speech/SpeechGenerationEvent.cjs} +0 -0
  117. /package/model-function/{describe-image/ImageDescriptionEvent.js → generate-speech/SpeechGenerationEvent.js} +0 -0
  118. /package/model-function/{describe-image/ImageDescriptionModel.cjs → generate-speech/SpeechGenerationModel.cjs} +0 -0
  119. /package/model-function/{describe-image/ImageDescriptionModel.js → generate-speech/SpeechGenerationModel.js} +0 -0
  120. /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionEvent.cjs +0 -0
  121. /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionEvent.d.ts +0 -0
  122. /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionEvent.js +0 -0
  123. /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionModel.cjs +0 -0
  124. /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionModel.d.ts +0 -0
  125. /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionModel.js +0 -0
@@ -1,133 +1,26 @@
1
- import { nanoid as createId } from "nanoid";
2
- import { FunctionEventSource } from "../../core/FunctionEventSource.js";
3
- import { getGlobalFunctionLogging } from "../../core/GlobalFunctionLogging.js";
4
- import { getGlobalFunctionObservers } from "../../core/GlobalFunctionObservers.js";
5
- import { AbortError } from "../../core/api/AbortError.js";
6
- import { getFunctionCallLogger } from "../../core/getFunctionCallLogger.js";
7
- import { getRun } from "../../core/getRun.js";
8
- import { startDurationMeasurement } from "../../util/DurationMeasurement.js";
9
- import { runSafe } from "../../util/runSafe.js";
10
1
  import { AsyncIterableResultPromise } from "../AsyncIterableResultPromise.js";
2
+ import { executeStreamCall } from "../executeStreamCall.js";
11
3
  export function streamText(model, prompt, options) {
12
- return new AsyncIterableResultPromise(doStreamText(model, prompt, options));
13
- }
14
- async function doStreamText(model, prompt, options) {
15
- const run = await getRun(options?.run);
16
- const eventSource = new FunctionEventSource({
17
- observers: [
18
- ...getFunctionCallLogger(options?.logging ?? getGlobalFunctionLogging()),
19
- ...getGlobalFunctionObservers(),
20
- ...(model.settings.observers ?? []),
21
- ...(run?.functionObserver != null ? [run.functionObserver] : []),
22
- ...(options?.observers ?? []),
23
- ],
24
- errorHandler: run?.errorHandler,
25
- });
26
- const durationMeasurement = startDurationMeasurement();
27
- const startMetadata = {
4
+ let accumulatedText = "";
5
+ let lastFullDelta;
6
+ return new AsyncIterableResultPromise(executeStreamCall({
28
7
  functionType: "text-streaming",
29
- callId: `call-${createId()}`,
30
- runId: run?.runId,
31
- sessionId: run?.sessionId,
32
- userId: run?.userId,
33
- functionId: options?.functionId,
34
- model: model.modelInformation,
35
- settings: model.settingsForEvent,
36
8
  input: prompt,
37
- timestamp: durationMeasurement.startDate,
38
- startTimestamp: durationMeasurement.startDate,
39
- };
40
- eventSource.notify({
41
- eventType: "started",
42
- ...startMetadata,
43
- });
44
- const result = await runSafe(async () => {
45
- const deltaIterable = await model.doStreamText(prompt, {
46
- functionId: options?.functionId,
47
- logging: options?.logging,
48
- observers: options?.observers,
49
- run,
50
- });
51
- return (async function* () {
52
- let accumulatedText = "";
53
- let lastFullDelta;
54
- for await (const event of deltaIterable) {
55
- if (event?.type === "error") {
56
- const error = event.error;
57
- const finishMetadata = {
58
- eventType: "finished",
59
- ...startMetadata,
60
- finishTimestamp: new Date(),
61
- durationInMs: durationMeasurement.durationInMs,
62
- };
63
- eventSource.notify(error instanceof AbortError
64
- ? {
65
- ...finishMetadata,
66
- result: {
67
- status: "abort",
68
- },
69
- }
70
- : {
71
- ...finishMetadata,
72
- result: {
73
- status: "error",
74
- error,
75
- },
76
- });
77
- throw error;
78
- }
79
- if (event?.type === "delta") {
80
- lastFullDelta = event.fullDelta;
81
- const textDelta = event.valueDelta;
82
- if (textDelta != null && textDelta.length > 0) {
83
- accumulatedText += textDelta;
84
- yield textDelta;
85
- }
86
- }
9
+ model,
10
+ options,
11
+ startStream: async (options) => model.doStreamText(prompt, options),
12
+ processDelta: (delta) => {
13
+ lastFullDelta = delta.fullDelta;
14
+ const textDelta = delta.valueDelta;
15
+ if (textDelta != null && textDelta.length > 0) {
16
+ accumulatedText += textDelta;
17
+ return textDelta;
87
18
  }
88
- const finishMetadata = {
89
- eventType: "finished",
90
- ...startMetadata,
91
- finishTimestamp: new Date(),
92
- durationInMs: durationMeasurement.durationInMs,
93
- };
94
- eventSource.notify({
95
- ...finishMetadata,
96
- result: {
97
- status: "success",
98
- response: lastFullDelta,
99
- value: accumulatedText,
100
- },
101
- });
102
- })();
103
- });
104
- if (!result.ok) {
105
- const finishMetadata = {
106
- eventType: "finished",
107
- ...startMetadata,
108
- finishTimestamp: new Date(),
109
- durationInMs: durationMeasurement.durationInMs,
110
- };
111
- if (result.isAborted) {
112
- eventSource.notify({
113
- ...finishMetadata,
114
- result: {
115
- status: "abort",
116
- },
117
- });
118
- throw new AbortError();
119
- }
120
- eventSource.notify({
121
- ...finishMetadata,
122
- result: {
123
- status: "error",
124
- error: result.error,
125
- },
126
- });
127
- throw result.error;
128
- }
129
- return {
130
- output: result.value,
131
- metadata: startMetadata,
132
- };
19
+ return undefined;
20
+ },
21
+ getResult: () => ({
22
+ response: lastFullDelta,
23
+ value: accumulatedText,
24
+ }),
25
+ }));
133
26
  }
@@ -10,7 +10,7 @@ const validateChatPrompt_js_1 = require("./validateChatPrompt.cjs");
10
10
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
11
11
  * return this minimal chat prompt.
12
12
  *
13
- * @see https://modelfusion.dev/guide/function/generate-text/prompt-format#limiting-the-chat-length
13
+ * @see https://modelfusion.dev/guide/function/generate-text#limiting-the-chat-length
14
14
  */
15
15
  async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
16
16
  (model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
@@ -8,7 +8,7 @@ import { HasContextWindowSize, HasTokenizer, TextGenerationModel, TextGeneration
8
8
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
9
9
  * return this minimal chat prompt.
10
10
  *
11
- * @see https://modelfusion.dev/guide/function/generate-text/prompt-format#limiting-the-chat-length
11
+ * @see https://modelfusion.dev/guide/function/generate-text#limiting-the-chat-length
12
12
  */
13
13
  export declare function trimChatPrompt({ prompt, model, tokenLimit, }: {
14
14
  prompt: ChatPrompt;
@@ -7,7 +7,7 @@ import { validateChatPrompt } from "./validateChatPrompt.js";
7
7
  * When the minimal chat prompt (system message + last user message) is already too long, it will only
8
8
  * return this minimal chat prompt.
9
9
  *
10
- * @see https://modelfusion.dev/guide/function/generate-text/prompt-format#limiting-the-chat-length
10
+ * @see https://modelfusion.dev/guide/function/generate-text#limiting-the-chat-length
11
11
  */
12
12
  export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
13
13
  (model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
@@ -1,7 +1,7 @@
1
1
  "use strict";
2
2
  Object.defineProperty(exports, "__esModule", { value: true });
3
- exports.transcribe = void 0;
4
- const executeCall_js_1 = require("../executeCall.cjs");
3
+ exports.generateTranscription = void 0;
4
+ const executeStandardCall_js_1 = require("../executeStandardCall.cjs");
5
5
  const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
6
6
  /**
7
7
  * Transcribe audio data into text.
@@ -9,7 +9,7 @@ const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
9
9
  * @example
10
10
  * const data = await fs.promises.readFile("data/test.mp3");
11
11
  *
12
- * const transcription = await transcribe(
12
+ * const transcription = await generateTranscription(
13
13
  * new OpenAITranscriptionModel({ model: "whisper-1" }),
14
14
  * {
15
15
  * type: "mp3",
@@ -17,8 +17,8 @@ const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
17
17
  * }
18
18
  * );
19
19
  */
20
- function transcribe(model, data, options) {
21
- return new ModelFunctionPromise_js_1.ModelFunctionPromise((0, executeCall_js_1.executeCall)({
20
+ function generateTranscription(model, data, options) {
21
+ return new ModelFunctionPromise_js_1.ModelFunctionPromise((0, executeStandardCall_js_1.executeStandardCall)({
22
22
  functionType: "transcription",
23
23
  input: data,
24
24
  model,
@@ -32,4 +32,4 @@ function transcribe(model, data, options) {
32
32
  },
33
33
  }));
34
34
  }
35
- exports.transcribe = transcribe;
35
+ exports.generateTranscription = generateTranscription;
@@ -7,7 +7,7 @@ import { TranscriptionModel, TranscriptionModelSettings } from "./TranscriptionM
7
7
  * @example
8
8
  * const data = await fs.promises.readFile("data/test.mp3");
9
9
  *
10
- * const transcription = await transcribe(
10
+ * const transcription = await generateTranscription(
11
11
  * new OpenAITranscriptionModel({ model: "whisper-1" }),
12
12
  * {
13
13
  * type: "mp3",
@@ -15,4 +15,4 @@ import { TranscriptionModel, TranscriptionModelSettings } from "./TranscriptionM
15
15
  * }
16
16
  * );
17
17
  */
18
- export declare function transcribe<DATA>(model: TranscriptionModel<DATA, TranscriptionModelSettings>, data: DATA, options?: FunctionOptions): ModelFunctionPromise<string>;
18
+ export declare function generateTranscription<DATA>(model: TranscriptionModel<DATA, TranscriptionModelSettings>, data: DATA, options?: FunctionOptions): ModelFunctionPromise<string>;
@@ -1,4 +1,4 @@
1
- import { executeCall } from "../executeCall.js";
1
+ import { executeStandardCall } from "../executeStandardCall.js";
2
2
  import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
3
3
  /**
4
4
  * Transcribe audio data into text.
@@ -6,7 +6,7 @@ import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
6
6
  * @example
7
7
  * const data = await fs.promises.readFile("data/test.mp3");
8
8
  *
9
- * const transcription = await transcribe(
9
+ * const transcription = await generateTranscription(
10
10
  * new OpenAITranscriptionModel({ model: "whisper-1" }),
11
11
  * {
12
12
  * type: "mp3",
@@ -14,8 +14,8 @@ import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
14
14
  * }
15
15
  * );
16
16
  */
17
- export function transcribe(model, data, options) {
18
- return new ModelFunctionPromise(executeCall({
17
+ export function generateTranscription(model, data, options) {
18
+ return new ModelFunctionPromise(executeStandardCall({
19
19
  functionType: "transcription",
20
20
  input: data,
21
21
  model,
@@ -21,9 +21,6 @@ __exportStar(require("./ModelCallMetadata.cjs"), exports);
21
21
  __exportStar(require("./ModelInformation.cjs"), exports);
22
22
  __exportStar(require("./PromptFormat.cjs"), exports);
23
23
  __exportStar(require("./SuccessfulModelCall.cjs"), exports);
24
- __exportStar(require("./describe-image/ImageDescriptionEvent.cjs"), exports);
25
- __exportStar(require("./describe-image/ImageDescriptionModel.cjs"), exports);
26
- __exportStar(require("./describe-image/describeImage.cjs"), exports);
27
24
  __exportStar(require("./embed/EmbeddingEvent.cjs"), exports);
28
25
  __exportStar(require("./embed/EmbeddingModel.cjs"), exports);
29
26
  __exportStar(require("./embed/embed.cjs"), exports);
@@ -31,23 +28,11 @@ __exportStar(require("./generate-image/ImageGenerationEvent.cjs"), exports);
31
28
  __exportStar(require("./generate-image/ImageGenerationModel.cjs"), exports);
32
29
  __exportStar(require("./generate-image/PromptFormatImageGenerationModel.cjs"), exports);
33
30
  __exportStar(require("./generate-image/generateImage.cjs"), exports);
34
- __exportStar(require("./generate-structure/NoSuchStructureError.cjs"), exports);
35
- __exportStar(require("./generate-structure/StructureFromTextGenerationModel.cjs"), exports);
36
- __exportStar(require("./generate-structure/StructureGenerationEvent.cjs"), exports);
37
- __exportStar(require("./generate-structure/StructureGenerationModel.cjs"), exports);
38
- __exportStar(require("./generate-structure/StructureOrTextGenerationModel.cjs"), exports);
39
- __exportStar(require("./generate-structure/StructureParseError.cjs"), exports);
40
- __exportStar(require("./generate-structure/StructureStreamingEvent.cjs"), exports);
41
- __exportStar(require("./generate-structure/StructureValidationError.cjs"), exports);
42
- __exportStar(require("./generate-structure/generateStructure.cjs"), exports);
43
- __exportStar(require("./generate-structure/generateStructureOrText.cjs"), exports);
44
- __exportStar(require("./generate-structure/streamStructure.cjs"), exports);
31
+ __exportStar(require("./generate-speech/index.cjs"), exports);
32
+ __exportStar(require("./generate-structure/index.cjs"), exports);
45
33
  __exportStar(require("./generate-text/index.cjs"), exports);
46
- __exportStar(require("./synthesize-speech/SpeechSynthesisEvent.cjs"), exports);
47
- __exportStar(require("./synthesize-speech/SpeechSynthesisModel.cjs"), exports);
48
- __exportStar(require("./synthesize-speech/synthesizeSpeech.cjs"), exports);
34
+ __exportStar(require("./generate-transcription/TranscriptionEvent.cjs"), exports);
35
+ __exportStar(require("./generate-transcription/TranscriptionModel.cjs"), exports);
36
+ __exportStar(require("./generate-transcription/generateTranscription.cjs"), exports);
49
37
  __exportStar(require("./tokenize-text/Tokenizer.cjs"), exports);
50
38
  __exportStar(require("./tokenize-text/countTokens.cjs"), exports);
51
- __exportStar(require("./transcribe-speech/TranscriptionEvent.cjs"), exports);
52
- __exportStar(require("./transcribe-speech/TranscriptionModel.cjs"), exports);
53
- __exportStar(require("./transcribe-speech/transcribe.cjs"), exports);
@@ -5,9 +5,6 @@ export * from "./ModelCallMetadata.js";
5
5
  export * from "./ModelInformation.js";
6
6
  export * from "./PromptFormat.js";
7
7
  export * from "./SuccessfulModelCall.js";
8
- export * from "./describe-image/ImageDescriptionEvent.js";
9
- export * from "./describe-image/ImageDescriptionModel.js";
10
- export * from "./describe-image/describeImage.js";
11
8
  export * from "./embed/EmbeddingEvent.js";
12
9
  export * from "./embed/EmbeddingModel.js";
13
10
  export * from "./embed/embed.js";
@@ -15,23 +12,11 @@ export * from "./generate-image/ImageGenerationEvent.js";
15
12
  export * from "./generate-image/ImageGenerationModel.js";
16
13
  export * from "./generate-image/PromptFormatImageGenerationModel.js";
17
14
  export * from "./generate-image/generateImage.js";
18
- export * from "./generate-structure/NoSuchStructureError.js";
19
- export * from "./generate-structure/StructureFromTextGenerationModel.js";
20
- export * from "./generate-structure/StructureGenerationEvent.js";
21
- export * from "./generate-structure/StructureGenerationModel.js";
22
- export * from "./generate-structure/StructureOrTextGenerationModel.js";
23
- export * from "./generate-structure/StructureParseError.js";
24
- export * from "./generate-structure/StructureStreamingEvent.js";
25
- export * from "./generate-structure/StructureValidationError.js";
26
- export * from "./generate-structure/generateStructure.js";
27
- export * from "./generate-structure/generateStructureOrText.js";
28
- export * from "./generate-structure/streamStructure.js";
15
+ export * from "./generate-speech/index.js";
16
+ export * from "./generate-structure/index.js";
29
17
  export * from "./generate-text/index.js";
30
- export * from "./synthesize-speech/SpeechSynthesisEvent.js";
31
- export * from "./synthesize-speech/SpeechSynthesisModel.js";
32
- export * from "./synthesize-speech/synthesizeSpeech.js";
18
+ export * from "./generate-transcription/TranscriptionEvent.js";
19
+ export * from "./generate-transcription/TranscriptionModel.js";
20
+ export * from "./generate-transcription/generateTranscription.js";
33
21
  export * from "./tokenize-text/Tokenizer.js";
34
22
  export * from "./tokenize-text/countTokens.js";
35
- export * from "./transcribe-speech/TranscriptionEvent.js";
36
- export * from "./transcribe-speech/TranscriptionModel.js";
37
- export * from "./transcribe-speech/transcribe.js";
@@ -5,9 +5,6 @@ export * from "./ModelCallMetadata.js";
5
5
  export * from "./ModelInformation.js";
6
6
  export * from "./PromptFormat.js";
7
7
  export * from "./SuccessfulModelCall.js";
8
- export * from "./describe-image/ImageDescriptionEvent.js";
9
- export * from "./describe-image/ImageDescriptionModel.js";
10
- export * from "./describe-image/describeImage.js";
11
8
  export * from "./embed/EmbeddingEvent.js";
12
9
  export * from "./embed/EmbeddingModel.js";
13
10
  export * from "./embed/embed.js";
@@ -15,23 +12,11 @@ export * from "./generate-image/ImageGenerationEvent.js";
15
12
  export * from "./generate-image/ImageGenerationModel.js";
16
13
  export * from "./generate-image/PromptFormatImageGenerationModel.js";
17
14
  export * from "./generate-image/generateImage.js";
18
- export * from "./generate-structure/NoSuchStructureError.js";
19
- export * from "./generate-structure/StructureFromTextGenerationModel.js";
20
- export * from "./generate-structure/StructureGenerationEvent.js";
21
- export * from "./generate-structure/StructureGenerationModel.js";
22
- export * from "./generate-structure/StructureOrTextGenerationModel.js";
23
- export * from "./generate-structure/StructureParseError.js";
24
- export * from "./generate-structure/StructureStreamingEvent.js";
25
- export * from "./generate-structure/StructureValidationError.js";
26
- export * from "./generate-structure/generateStructure.js";
27
- export * from "./generate-structure/generateStructureOrText.js";
28
- export * from "./generate-structure/streamStructure.js";
15
+ export * from "./generate-speech/index.js";
16
+ export * from "./generate-structure/index.js";
29
17
  export * from "./generate-text/index.js";
30
- export * from "./synthesize-speech/SpeechSynthesisEvent.js";
31
- export * from "./synthesize-speech/SpeechSynthesisModel.js";
32
- export * from "./synthesize-speech/synthesizeSpeech.js";
18
+ export * from "./generate-transcription/TranscriptionEvent.js";
19
+ export * from "./generate-transcription/TranscriptionModel.js";
20
+ export * from "./generate-transcription/generateTranscription.js";
33
21
  export * from "./tokenize-text/Tokenizer.js";
34
22
  export * from "./tokenize-text/countTokens.js";
35
- export * from "./transcribe-speech/TranscriptionEvent.js";
36
- export * from "./transcribe-speech/TranscriptionModel.js";
37
- export * from "./transcribe-speech/transcribe.js";
@@ -18,5 +18,8 @@ class ElevenLabsApiConfiguration extends BaseUrlApiConfiguration_js_1.BaseUrlApi
18
18
  throttle,
19
19
  });
20
20
  }
21
+ get apiKey() {
22
+ return this.headers["xi-api-key"];
23
+ }
21
24
  }
22
25
  exports.ElevenLabsApiConfiguration = ElevenLabsApiConfiguration;
@@ -8,4 +8,5 @@ export declare class ElevenLabsApiConfiguration extends BaseUrlApiConfiguration
8
8
  retry?: RetryFunction;
9
9
  throttle?: ThrottleFunction;
10
10
  });
11
+ get apiKey(): string;
11
12
  }
@@ -15,4 +15,7 @@ export class ElevenLabsApiConfiguration extends BaseUrlApiConfiguration {
15
15
  throttle,
16
16
  });
17
17
  }
18
+ get apiKey() {
19
+ return this.headers["xi-api-key"];
20
+ }
18
21
  }
@@ -0,0 +1,191 @@
1
+ "use strict";
2
+ Object.defineProperty(exports, "__esModule", { value: true });
3
+ exports.ElevenLabsSpeechModel = void 0;
4
+ const zod_1 = require("zod");
5
+ const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
6
+ const postToApi_js_1 = require("../../core/api/postToApi.cjs");
7
+ const AsyncQueue_js_1 = require("../../event-source/AsyncQueue.cjs");
8
+ const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
9
+ const SimpleWebSocket_js_1 = require("../../util/SimpleWebSocket.cjs");
10
+ const parseJSON_js_1 = require("../../util/parseJSON.cjs");
11
+ const ElevenLabsApiConfiguration_js_1 = require("./ElevenLabsApiConfiguration.cjs");
12
+ const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
13
+ const elevenLabsModels = [
14
+ "eleven_multilingual_v2",
15
+ "eleven_multilingual_v1",
16
+ "eleven_monolingual_v1",
17
+ ];
18
+ const defaultModel = "eleven_multilingual_v2";
19
+ /**
20
+ * Synthesize speech using the ElevenLabs Text to Speech API.
21
+ *
22
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
23
+ */
24
+ class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
25
+ constructor(settings) {
26
+ super({ settings });
27
+ Object.defineProperty(this, "provider", {
28
+ enumerable: true,
29
+ configurable: true,
30
+ writable: true,
31
+ value: "elevenlabs"
32
+ });
33
+ }
34
+ get modelName() {
35
+ return this.settings.voice;
36
+ }
37
+ async callAPI(text, options) {
38
+ return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
39
+ retry: this.settings.api?.retry,
40
+ throttle: this.settings.api?.throttle,
41
+ call: async () => callElevenLabsTextToSpeechAPI({
42
+ api: this.settings.api,
43
+ abortSignal: options?.run?.abortSignal,
44
+ text,
45
+ voiceId: this.settings.voice,
46
+ modelId: this.settings.model,
47
+ voiceSettings: this.settings.voiceSettings,
48
+ }),
49
+ });
50
+ }
51
+ get settingsForEvent() {
52
+ return {
53
+ model: this.settings.model,
54
+ voice: this.settings.voice,
55
+ voiceSettings: this.settings.voiceSettings,
56
+ };
57
+ }
58
+ doGenerateSpeechStandard(text, options) {
59
+ return this.callAPI(text, options);
60
+ }
61
+ async doGenerateSpeechStreamDuplex(textStream
62
+ // options?: FunctionOptions | undefined
63
+ ) {
64
+ const responseSchema = zod_1.z.union([
65
+ zod_1.z.object({
66
+ audio: zod_1.z.string(),
67
+ isFinal: zod_1.z.literal(false).nullable(),
68
+ normalizedAlignment: zod_1.z
69
+ .object({
70
+ chars: zod_1.z.array(zod_1.z.string()),
71
+ charStartTimesMs: zod_1.z.array(zod_1.z.number()),
72
+ charDurationsMs: zod_1.z.array(zod_1.z.number()),
73
+ })
74
+ .nullable(),
75
+ }),
76
+ zod_1.z.object({
77
+ isFinal: zod_1.z.literal(true),
78
+ }),
79
+ zod_1.z.object({
80
+ message: zod_1.z.string(),
81
+ error: zod_1.z.string(),
82
+ code: zod_1.z.number(),
83
+ }),
84
+ ]);
85
+ const queue = new AsyncQueue_js_1.AsyncQueue();
86
+ const model = this.settings.model ?? defaultModel;
87
+ const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input?model_id=${model}`);
88
+ socket.onopen = async () => {
89
+ const api = this.settings.api ?? new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration();
90
+ // send begin-of-stream (BOS) message:
91
+ socket.send(JSON.stringify({
92
+ // The JS WebSocket API does not support authorization headers, so we send the API key in the BOS message.
93
+ // See https://stackoverflow.com/questions/4361173/http-headers-in-websockets-client-api
94
+ xi_api_key: api.apiKey,
95
+ text: " ",
96
+ voice_settings: toApiVoiceSettings(this.settings.voiceSettings),
97
+ generation_config: toGenerationConfig(this.settings.generationConfig),
98
+ }));
99
+ // send text in chunks:
100
+ let textBuffer = "";
101
+ for await (const textDelta of textStream) {
102
+ textBuffer += textDelta;
103
+ // using ". " as separator: sending in full sentences improves the quality
104
+ // of the audio output significantly.
105
+ const separator = textBuffer.lastIndexOf(". ");
106
+ if (separator === -1) {
107
+ continue;
108
+ }
109
+ const textToProcess = textBuffer.slice(0, separator);
110
+ textBuffer = textBuffer.slice(separator + 1);
111
+ socket.send(JSON.stringify({
112
+ text: textToProcess,
113
+ try_trigger_generation: true,
114
+ }));
115
+ }
116
+ // send remaining text:
117
+ if (textBuffer.length > 0) {
118
+ socket.send(JSON.stringify({
119
+ text: `${textBuffer} `,
120
+ try_trigger_generation: true,
121
+ }));
122
+ }
123
+ // send end-of-stream (EOS) message:
124
+ socket.send(JSON.stringify({ text: "" }));
125
+ };
126
+ socket.onmessage = (event) => {
127
+ const parseResult = (0, parseJSON_js_1.safeParseJsonWithZod)(event.data, responseSchema);
128
+ if (!parseResult.success) {
129
+ queue.push({ type: "error", error: parseResult.error });
130
+ return;
131
+ }
132
+ const response = parseResult.data;
133
+ if ("error" in response) {
134
+ queue.push({ type: "error", error: response });
135
+ return;
136
+ }
137
+ if (!response.isFinal) {
138
+ queue.push({
139
+ type: "delta",
140
+ fullDelta: event,
141
+ valueDelta: Buffer.from(response.audio, "base64"),
142
+ });
143
+ }
144
+ };
145
+ socket.onerror = (error) => {
146
+ queue.push({ type: "error", error });
147
+ };
148
+ socket.onclose = () => {
149
+ queue.close();
150
+ };
151
+ return queue;
152
+ }
153
+ withSettings(additionalSettings) {
154
+ return new ElevenLabsSpeechModel({
155
+ ...this.settings,
156
+ ...additionalSettings,
157
+ });
158
+ }
159
+ }
160
+ exports.ElevenLabsSpeechModel = ElevenLabsSpeechModel;
161
+ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, voiceSettings, }) {
162
+ return (0, postToApi_js_1.postJsonToApi)({
163
+ url: api.assembleUrl(`/text-to-speech/${voiceId}`),
164
+ headers: api.headers,
165
+ body: {
166
+ text,
167
+ model_id: modelId ?? defaultModel,
168
+ voice_settings: toApiVoiceSettings(voiceSettings),
169
+ },
170
+ failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
171
+ successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
172
+ abortSignal,
173
+ });
174
+ }
175
+ function toApiVoiceSettings(voiceSettings) {
176
+ return voiceSettings != null
177
+ ? {
178
+ stability: voiceSettings.stability,
179
+ similarity_boost: voiceSettings.similarityBoost,
180
+ style: voiceSettings.style,
181
+ use_speaker_boost: voiceSettings.useSpeakerBoost,
182
+ }
183
+ : undefined;
184
+ }
185
+ function toGenerationConfig(generationConfig) {
186
+ return generationConfig != null
187
+ ? {
188
+ chunk_length_schedule: generationConfig.chunkLengthSchedule,
189
+ }
190
+ : undefined;
191
+ }
@@ -0,0 +1,39 @@
1
+ /// <reference types="node" />
2
+ import { FunctionOptions } from "../../core/FunctionOptions.js";
3
+ import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
4
+ import { AbstractModel } from "../../model-function/AbstractModel.js";
5
+ import { Delta } from "../../model-function/Delta.js";
6
+ import { StreamingSpeechGenerationModel, SpeechGenerationModelSettings } from "../../model-function/generate-speech/SpeechGenerationModel.js";
7
+ declare const elevenLabsModels: readonly ["eleven_multilingual_v2", "eleven_multilingual_v1", "eleven_monolingual_v1"];
8
+ export interface ElevenLabsSpeechModelSettings extends SpeechGenerationModelSettings {
9
+ api?: ApiConfiguration & {
10
+ apiKey: string;
11
+ };
12
+ voice: string;
13
+ model?: (typeof elevenLabsModels)[number] | (string & {});
14
+ voiceSettings?: {
15
+ stability: number;
16
+ similarityBoost: number;
17
+ style?: number;
18
+ useSpeakerBoost?: boolean;
19
+ };
20
+ generationConfig?: {
21
+ chunkLengthSchedule: number[];
22
+ };
23
+ }
24
+ /**
25
+ * Synthesize speech using the ElevenLabs Text to Speech API.
26
+ *
27
+ * @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
28
+ */
29
+ export declare class ElevenLabsSpeechModel extends AbstractModel<ElevenLabsSpeechModelSettings> implements StreamingSpeechGenerationModel<ElevenLabsSpeechModelSettings> {
30
+ constructor(settings: ElevenLabsSpeechModelSettings);
31
+ readonly provider = "elevenlabs";
32
+ get modelName(): string;
33
+ private callAPI;
34
+ get settingsForEvent(): Partial<ElevenLabsSpeechModelSettings>;
35
+ doGenerateSpeechStandard(text: string, options?: FunctionOptions): Promise<Buffer>;
36
+ doGenerateSpeechStreamDuplex(textStream: AsyncIterable<string>): Promise<AsyncIterable<Delta<Buffer>>>;
37
+ withSettings(additionalSettings: Partial<ElevenLabsSpeechModelSettings>): this;
38
+ }
39
+ export {};