modelfusion 0.47.3 → 0.49.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -33
- package/core/getRun.cjs +5 -3
- package/core/getRun.js +5 -3
- package/index.cjs +1 -0
- package/index.d.ts +1 -0
- package/index.js +1 -0
- package/model-function/AsyncIterableResultPromise.cjs +5 -5
- package/model-function/AsyncIterableResultPromise.d.ts +3 -3
- package/model-function/AsyncIterableResultPromise.js +5 -5
- package/model-function/Model.d.ts +1 -1
- package/model-function/ModelCallEvent.d.ts +5 -7
- package/model-function/embed/embed.cjs +3 -3
- package/model-function/embed/embed.js +3 -3
- package/model-function/{executeCall.cjs → executeStandardCall.cjs} +3 -3
- package/model-function/{executeCall.d.ts → executeStandardCall.d.ts} +1 -1
- package/model-function/{executeCall.js → executeStandardCall.js} +1 -1
- package/model-function/executeStreamCall.cjs +132 -0
- package/model-function/executeStreamCall.d.ts +20 -0
- package/model-function/executeStreamCall.js +128 -0
- package/model-function/generate-image/generateImage.cjs +2 -2
- package/model-function/generate-image/generateImage.js +2 -2
- package/model-function/generate-speech/SpeechGenerationEvent.d.ts +27 -0
- package/model-function/generate-speech/SpeechGenerationModel.d.ts +15 -0
- package/model-function/{synthesize-speech/synthesizeSpeech.cjs → generate-speech/generateSpeech.cjs} +7 -7
- package/model-function/{synthesize-speech/synthesizeSpeech.d.ts → generate-speech/generateSpeech.d.ts} +2 -2
- package/model-function/{synthesize-speech/synthesizeSpeech.js → generate-speech/generateSpeech.js} +5 -5
- package/model-function/generate-speech/index.cjs +20 -0
- package/model-function/generate-speech/index.d.ts +4 -0
- package/model-function/generate-speech/index.js +4 -0
- package/model-function/generate-speech/streamSpeech.cjs +34 -0
- package/model-function/generate-speech/streamSpeech.d.ts +8 -0
- package/model-function/generate-speech/streamSpeech.js +30 -0
- package/model-function/generate-structure/generateStructure.cjs +2 -2
- package/model-function/generate-structure/generateStructure.js +2 -2
- package/model-function/generate-structure/generateStructureOrText.cjs +2 -2
- package/model-function/generate-structure/generateStructureOrText.js +2 -2
- package/model-function/generate-structure/index.cjs +27 -0
- package/model-function/generate-structure/index.d.ts +11 -0
- package/model-function/generate-structure/index.js +11 -0
- package/model-function/generate-structure/streamStructure.cjs +28 -136
- package/model-function/generate-structure/streamStructure.js +27 -135
- package/model-function/generate-text/TextGenerationEvent.d.ts +6 -0
- package/model-function/generate-text/generateText.cjs +3 -3
- package/model-function/generate-text/generateText.d.ts +1 -1
- package/model-function/generate-text/generateText.js +3 -3
- package/model-function/generate-text/index.cjs +0 -1
- package/model-function/generate-text/index.d.ts +0 -1
- package/model-function/generate-text/index.js +0 -1
- package/model-function/generate-text/streamText.cjs +21 -128
- package/model-function/generate-text/streamText.js +20 -127
- package/model-function/generate-text/trimChatPrompt.cjs +1 -1
- package/model-function/generate-text/trimChatPrompt.d.ts +1 -1
- package/model-function/generate-text/trimChatPrompt.js +1 -1
- package/model-function/{transcribe-speech/transcribe.cjs → generate-transcription/generateTranscription.cjs} +6 -6
- package/model-function/{transcribe-speech/transcribe.d.ts → generate-transcription/generateTranscription.d.ts} +2 -2
- package/model-function/{transcribe-speech/transcribe.js → generate-transcription/generateTranscription.js} +4 -4
- package/model-function/index.cjs +5 -20
- package/model-function/index.d.ts +5 -20
- package/model-function/index.js +5 -20
- package/model-provider/elevenlabs/ElevenLabsApiConfiguration.cjs +3 -0
- package/model-provider/elevenlabs/ElevenLabsApiConfiguration.d.ts +1 -0
- package/model-provider/elevenlabs/ElevenLabsApiConfiguration.js +3 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechModel.cjs +191 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechModel.d.ts +39 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechModel.js +187 -0
- package/model-provider/elevenlabs/index.cjs +1 -1
- package/model-provider/elevenlabs/index.d.ts +1 -1
- package/model-provider/elevenlabs/index.js +1 -1
- package/model-provider/huggingface/HuggingFaceImageDescriptionModel.cjs +21 -2
- package/model-provider/huggingface/HuggingFaceImageDescriptionModel.d.ts +11 -6
- package/model-provider/huggingface/HuggingFaceImageDescriptionModel.js +21 -2
- package/model-provider/lmnt/{LmntSpeechSynthesisModel.cjs → LmntSpeechModel.cjs} +5 -5
- package/model-provider/lmnt/LmntSpeechModel.d.ts +26 -0
- package/model-provider/lmnt/{LmntSpeechSynthesisModel.js → LmntSpeechModel.js} +3 -3
- package/model-provider/lmnt/index.cjs +1 -1
- package/model-provider/lmnt/index.d.ts +1 -1
- package/model-provider/lmnt/index.js +1 -1
- package/model-provider/openai/{OpenAITextGenerationModel.cjs → OpenAICompletionModel.cjs} +17 -17
- package/model-provider/openai/{OpenAITextGenerationModel.d.ts → OpenAICompletionModel.d.ts} +25 -25
- package/model-provider/openai/{OpenAITextGenerationModel.js → OpenAICompletionModel.js} +12 -12
- package/model-provider/openai/OpenAICostCalculator.cjs +3 -3
- package/model-provider/openai/OpenAICostCalculator.js +3 -3
- package/model-provider/openai/OpenAITranscriptionModel.d.ts +1 -1
- package/model-provider/openai/TikTokenTokenizer.d.ts +2 -2
- package/model-provider/openai/index.cjs +1 -1
- package/model-provider/openai/index.d.ts +1 -1
- package/model-provider/openai/index.js +1 -1
- package/package.json +3 -1
- package/ui/MediaSourceAppender.cjs +54 -0
- package/ui/MediaSourceAppender.d.ts +11 -0
- package/ui/MediaSourceAppender.js +50 -0
- package/ui/index.cjs +17 -0
- package/ui/index.d.ts +1 -0
- package/ui/index.js +1 -0
- package/util/SimpleWebSocket.cjs +41 -0
- package/util/SimpleWebSocket.d.ts +12 -0
- package/util/SimpleWebSocket.js +14 -0
- package/model-function/describe-image/ImageDescriptionEvent.d.ts +0 -18
- package/model-function/describe-image/ImageDescriptionModel.d.ts +0 -10
- package/model-function/describe-image/describeImage.cjs +0 -26
- package/model-function/describe-image/describeImage.d.ts +0 -9
- package/model-function/describe-image/describeImage.js +0 -22
- package/model-function/generate-text/TextStreamingEvent.cjs +0 -2
- package/model-function/generate-text/TextStreamingEvent.d.ts +0 -7
- package/model-function/generate-text/TextStreamingEvent.js +0 -1
- package/model-function/synthesize-speech/SpeechSynthesisEvent.cjs +0 -2
- package/model-function/synthesize-speech/SpeechSynthesisEvent.d.ts +0 -21
- package/model-function/synthesize-speech/SpeechSynthesisEvent.js +0 -1
- package/model-function/synthesize-speech/SpeechSynthesisModel.cjs +0 -2
- package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts +0 -11
- package/model-function/synthesize-speech/SpeechSynthesisModel.js +0 -1
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +0 -79
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +0 -30
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +0 -75
- package/model-provider/lmnt/LmntSpeechSynthesisModel.d.ts +0 -26
- /package/model-function/{describe-image/ImageDescriptionEvent.cjs → generate-speech/SpeechGenerationEvent.cjs} +0 -0
- /package/model-function/{describe-image/ImageDescriptionEvent.js → generate-speech/SpeechGenerationEvent.js} +0 -0
- /package/model-function/{describe-image/ImageDescriptionModel.cjs → generate-speech/SpeechGenerationModel.cjs} +0 -0
- /package/model-function/{describe-image/ImageDescriptionModel.js → generate-speech/SpeechGenerationModel.js} +0 -0
- /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionEvent.cjs +0 -0
- /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionEvent.d.ts +0 -0
- /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionEvent.js +0 -0
- /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionModel.cjs +0 -0
- /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionModel.d.ts +0 -0
- /package/model-function/{transcribe-speech → generate-transcription}/TranscriptionModel.js +0 -0
@@ -1,133 +1,26 @@
|
|
1
|
-
import { nanoid as createId } from "nanoid";
|
2
|
-
import { FunctionEventSource } from "../../core/FunctionEventSource.js";
|
3
|
-
import { getGlobalFunctionLogging } from "../../core/GlobalFunctionLogging.js";
|
4
|
-
import { getGlobalFunctionObservers } from "../../core/GlobalFunctionObservers.js";
|
5
|
-
import { AbortError } from "../../core/api/AbortError.js";
|
6
|
-
import { getFunctionCallLogger } from "../../core/getFunctionCallLogger.js";
|
7
|
-
import { getRun } from "../../core/getRun.js";
|
8
|
-
import { startDurationMeasurement } from "../../util/DurationMeasurement.js";
|
9
|
-
import { runSafe } from "../../util/runSafe.js";
|
10
1
|
import { AsyncIterableResultPromise } from "../AsyncIterableResultPromise.js";
|
2
|
+
import { executeStreamCall } from "../executeStreamCall.js";
|
11
3
|
export function streamText(model, prompt, options) {
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
const run = await getRun(options?.run);
|
16
|
-
const eventSource = new FunctionEventSource({
|
17
|
-
observers: [
|
18
|
-
...getFunctionCallLogger(options?.logging ?? getGlobalFunctionLogging()),
|
19
|
-
...getGlobalFunctionObservers(),
|
20
|
-
...(model.settings.observers ?? []),
|
21
|
-
...(run?.functionObserver != null ? [run.functionObserver] : []),
|
22
|
-
...(options?.observers ?? []),
|
23
|
-
],
|
24
|
-
errorHandler: run?.errorHandler,
|
25
|
-
});
|
26
|
-
const durationMeasurement = startDurationMeasurement();
|
27
|
-
const startMetadata = {
|
4
|
+
let accumulatedText = "";
|
5
|
+
let lastFullDelta;
|
6
|
+
return new AsyncIterableResultPromise(executeStreamCall({
|
28
7
|
functionType: "text-streaming",
|
29
|
-
callId: `call-${createId()}`,
|
30
|
-
runId: run?.runId,
|
31
|
-
sessionId: run?.sessionId,
|
32
|
-
userId: run?.userId,
|
33
|
-
functionId: options?.functionId,
|
34
|
-
model: model.modelInformation,
|
35
|
-
settings: model.settingsForEvent,
|
36
8
|
input: prompt,
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
functionId: options?.functionId,
|
47
|
-
logging: options?.logging,
|
48
|
-
observers: options?.observers,
|
49
|
-
run,
|
50
|
-
});
|
51
|
-
return (async function* () {
|
52
|
-
let accumulatedText = "";
|
53
|
-
let lastFullDelta;
|
54
|
-
for await (const event of deltaIterable) {
|
55
|
-
if (event?.type === "error") {
|
56
|
-
const error = event.error;
|
57
|
-
const finishMetadata = {
|
58
|
-
eventType: "finished",
|
59
|
-
...startMetadata,
|
60
|
-
finishTimestamp: new Date(),
|
61
|
-
durationInMs: durationMeasurement.durationInMs,
|
62
|
-
};
|
63
|
-
eventSource.notify(error instanceof AbortError
|
64
|
-
? {
|
65
|
-
...finishMetadata,
|
66
|
-
result: {
|
67
|
-
status: "abort",
|
68
|
-
},
|
69
|
-
}
|
70
|
-
: {
|
71
|
-
...finishMetadata,
|
72
|
-
result: {
|
73
|
-
status: "error",
|
74
|
-
error,
|
75
|
-
},
|
76
|
-
});
|
77
|
-
throw error;
|
78
|
-
}
|
79
|
-
if (event?.type === "delta") {
|
80
|
-
lastFullDelta = event.fullDelta;
|
81
|
-
const textDelta = event.valueDelta;
|
82
|
-
if (textDelta != null && textDelta.length > 0) {
|
83
|
-
accumulatedText += textDelta;
|
84
|
-
yield textDelta;
|
85
|
-
}
|
86
|
-
}
|
9
|
+
model,
|
10
|
+
options,
|
11
|
+
startStream: async (options) => model.doStreamText(prompt, options),
|
12
|
+
processDelta: (delta) => {
|
13
|
+
lastFullDelta = delta.fullDelta;
|
14
|
+
const textDelta = delta.valueDelta;
|
15
|
+
if (textDelta != null && textDelta.length > 0) {
|
16
|
+
accumulatedText += textDelta;
|
17
|
+
return textDelta;
|
87
18
|
}
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
...finishMetadata,
|
96
|
-
result: {
|
97
|
-
status: "success",
|
98
|
-
response: lastFullDelta,
|
99
|
-
value: accumulatedText,
|
100
|
-
},
|
101
|
-
});
|
102
|
-
})();
|
103
|
-
});
|
104
|
-
if (!result.ok) {
|
105
|
-
const finishMetadata = {
|
106
|
-
eventType: "finished",
|
107
|
-
...startMetadata,
|
108
|
-
finishTimestamp: new Date(),
|
109
|
-
durationInMs: durationMeasurement.durationInMs,
|
110
|
-
};
|
111
|
-
if (result.isAborted) {
|
112
|
-
eventSource.notify({
|
113
|
-
...finishMetadata,
|
114
|
-
result: {
|
115
|
-
status: "abort",
|
116
|
-
},
|
117
|
-
});
|
118
|
-
throw new AbortError();
|
119
|
-
}
|
120
|
-
eventSource.notify({
|
121
|
-
...finishMetadata,
|
122
|
-
result: {
|
123
|
-
status: "error",
|
124
|
-
error: result.error,
|
125
|
-
},
|
126
|
-
});
|
127
|
-
throw result.error;
|
128
|
-
}
|
129
|
-
return {
|
130
|
-
output: result.value,
|
131
|
-
metadata: startMetadata,
|
132
|
-
};
|
19
|
+
return undefined;
|
20
|
+
},
|
21
|
+
getResult: () => ({
|
22
|
+
response: lastFullDelta,
|
23
|
+
value: accumulatedText,
|
24
|
+
}),
|
25
|
+
}));
|
133
26
|
}
|
@@ -10,7 +10,7 @@ const validateChatPrompt_js_1 = require("./validateChatPrompt.cjs");
|
|
10
10
|
* When the minimal chat prompt (system message + last user message) is already too long, it will only
|
11
11
|
* return this minimal chat prompt.
|
12
12
|
*
|
13
|
-
* @see https://modelfusion.dev/guide/function/generate-text
|
13
|
+
* @see https://modelfusion.dev/guide/function/generate-text#limiting-the-chat-length
|
14
14
|
*/
|
15
15
|
async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
16
16
|
(model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
@@ -8,7 +8,7 @@ import { HasContextWindowSize, HasTokenizer, TextGenerationModel, TextGeneration
|
|
8
8
|
* When the minimal chat prompt (system message + last user message) is already too long, it will only
|
9
9
|
* return this minimal chat prompt.
|
10
10
|
*
|
11
|
-
* @see https://modelfusion.dev/guide/function/generate-text
|
11
|
+
* @see https://modelfusion.dev/guide/function/generate-text#limiting-the-chat-length
|
12
12
|
*/
|
13
13
|
export declare function trimChatPrompt({ prompt, model, tokenLimit, }: {
|
14
14
|
prompt: ChatPrompt;
|
@@ -7,7 +7,7 @@ import { validateChatPrompt } from "./validateChatPrompt.js";
|
|
7
7
|
* When the minimal chat prompt (system message + last user message) is already too long, it will only
|
8
8
|
* return this minimal chat prompt.
|
9
9
|
*
|
10
|
-
* @see https://modelfusion.dev/guide/function/generate-text
|
10
|
+
* @see https://modelfusion.dev/guide/function/generate-text#limiting-the-chat-length
|
11
11
|
*/
|
12
12
|
export async function trimChatPrompt({ prompt, model, tokenLimit = model.contextWindowSize -
|
13
13
|
(model.settings.maxCompletionTokens ?? model.contextWindowSize / 4), }) {
|
@@ -1,7 +1,7 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
-
exports.
|
4
|
-
const
|
3
|
+
exports.generateTranscription = void 0;
|
4
|
+
const executeStandardCall_js_1 = require("../executeStandardCall.cjs");
|
5
5
|
const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
|
6
6
|
/**
|
7
7
|
* Transcribe audio data into text.
|
@@ -9,7 +9,7 @@ const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
|
|
9
9
|
* @example
|
10
10
|
* const data = await fs.promises.readFile("data/test.mp3");
|
11
11
|
*
|
12
|
-
* const transcription = await
|
12
|
+
* const transcription = await generateTranscription(
|
13
13
|
* new OpenAITranscriptionModel({ model: "whisper-1" }),
|
14
14
|
* {
|
15
15
|
* type: "mp3",
|
@@ -17,8 +17,8 @@ const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
|
|
17
17
|
* }
|
18
18
|
* );
|
19
19
|
*/
|
20
|
-
function
|
21
|
-
return new ModelFunctionPromise_js_1.ModelFunctionPromise((0,
|
20
|
+
function generateTranscription(model, data, options) {
|
21
|
+
return new ModelFunctionPromise_js_1.ModelFunctionPromise((0, executeStandardCall_js_1.executeStandardCall)({
|
22
22
|
functionType: "transcription",
|
23
23
|
input: data,
|
24
24
|
model,
|
@@ -32,4 +32,4 @@ function transcribe(model, data, options) {
|
|
32
32
|
},
|
33
33
|
}));
|
34
34
|
}
|
35
|
-
exports.
|
35
|
+
exports.generateTranscription = generateTranscription;
|
@@ -7,7 +7,7 @@ import { TranscriptionModel, TranscriptionModelSettings } from "./TranscriptionM
|
|
7
7
|
* @example
|
8
8
|
* const data = await fs.promises.readFile("data/test.mp3");
|
9
9
|
*
|
10
|
-
* const transcription = await
|
10
|
+
* const transcription = await generateTranscription(
|
11
11
|
* new OpenAITranscriptionModel({ model: "whisper-1" }),
|
12
12
|
* {
|
13
13
|
* type: "mp3",
|
@@ -15,4 +15,4 @@ import { TranscriptionModel, TranscriptionModelSettings } from "./TranscriptionM
|
|
15
15
|
* }
|
16
16
|
* );
|
17
17
|
*/
|
18
|
-
export declare function
|
18
|
+
export declare function generateTranscription<DATA>(model: TranscriptionModel<DATA, TranscriptionModelSettings>, data: DATA, options?: FunctionOptions): ModelFunctionPromise<string>;
|
@@ -1,4 +1,4 @@
|
|
1
|
-
import {
|
1
|
+
import { executeStandardCall } from "../executeStandardCall.js";
|
2
2
|
import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
|
3
3
|
/**
|
4
4
|
* Transcribe audio data into text.
|
@@ -6,7 +6,7 @@ import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
|
|
6
6
|
* @example
|
7
7
|
* const data = await fs.promises.readFile("data/test.mp3");
|
8
8
|
*
|
9
|
-
* const transcription = await
|
9
|
+
* const transcription = await generateTranscription(
|
10
10
|
* new OpenAITranscriptionModel({ model: "whisper-1" }),
|
11
11
|
* {
|
12
12
|
* type: "mp3",
|
@@ -14,8 +14,8 @@ import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
|
|
14
14
|
* }
|
15
15
|
* );
|
16
16
|
*/
|
17
|
-
export function
|
18
|
-
return new ModelFunctionPromise(
|
17
|
+
export function generateTranscription(model, data, options) {
|
18
|
+
return new ModelFunctionPromise(executeStandardCall({
|
19
19
|
functionType: "transcription",
|
20
20
|
input: data,
|
21
21
|
model,
|
package/model-function/index.cjs
CHANGED
@@ -21,9 +21,6 @@ __exportStar(require("./ModelCallMetadata.cjs"), exports);
|
|
21
21
|
__exportStar(require("./ModelInformation.cjs"), exports);
|
22
22
|
__exportStar(require("./PromptFormat.cjs"), exports);
|
23
23
|
__exportStar(require("./SuccessfulModelCall.cjs"), exports);
|
24
|
-
__exportStar(require("./describe-image/ImageDescriptionEvent.cjs"), exports);
|
25
|
-
__exportStar(require("./describe-image/ImageDescriptionModel.cjs"), exports);
|
26
|
-
__exportStar(require("./describe-image/describeImage.cjs"), exports);
|
27
24
|
__exportStar(require("./embed/EmbeddingEvent.cjs"), exports);
|
28
25
|
__exportStar(require("./embed/EmbeddingModel.cjs"), exports);
|
29
26
|
__exportStar(require("./embed/embed.cjs"), exports);
|
@@ -31,23 +28,11 @@ __exportStar(require("./generate-image/ImageGenerationEvent.cjs"), exports);
|
|
31
28
|
__exportStar(require("./generate-image/ImageGenerationModel.cjs"), exports);
|
32
29
|
__exportStar(require("./generate-image/PromptFormatImageGenerationModel.cjs"), exports);
|
33
30
|
__exportStar(require("./generate-image/generateImage.cjs"), exports);
|
34
|
-
__exportStar(require("./generate-
|
35
|
-
__exportStar(require("./generate-structure/
|
36
|
-
__exportStar(require("./generate-structure/StructureGenerationEvent.cjs"), exports);
|
37
|
-
__exportStar(require("./generate-structure/StructureGenerationModel.cjs"), exports);
|
38
|
-
__exportStar(require("./generate-structure/StructureOrTextGenerationModel.cjs"), exports);
|
39
|
-
__exportStar(require("./generate-structure/StructureParseError.cjs"), exports);
|
40
|
-
__exportStar(require("./generate-structure/StructureStreamingEvent.cjs"), exports);
|
41
|
-
__exportStar(require("./generate-structure/StructureValidationError.cjs"), exports);
|
42
|
-
__exportStar(require("./generate-structure/generateStructure.cjs"), exports);
|
43
|
-
__exportStar(require("./generate-structure/generateStructureOrText.cjs"), exports);
|
44
|
-
__exportStar(require("./generate-structure/streamStructure.cjs"), exports);
|
31
|
+
__exportStar(require("./generate-speech/index.cjs"), exports);
|
32
|
+
__exportStar(require("./generate-structure/index.cjs"), exports);
|
45
33
|
__exportStar(require("./generate-text/index.cjs"), exports);
|
46
|
-
__exportStar(require("./
|
47
|
-
__exportStar(require("./
|
48
|
-
__exportStar(require("./
|
34
|
+
__exportStar(require("./generate-transcription/TranscriptionEvent.cjs"), exports);
|
35
|
+
__exportStar(require("./generate-transcription/TranscriptionModel.cjs"), exports);
|
36
|
+
__exportStar(require("./generate-transcription/generateTranscription.cjs"), exports);
|
49
37
|
__exportStar(require("./tokenize-text/Tokenizer.cjs"), exports);
|
50
38
|
__exportStar(require("./tokenize-text/countTokens.cjs"), exports);
|
51
|
-
__exportStar(require("./transcribe-speech/TranscriptionEvent.cjs"), exports);
|
52
|
-
__exportStar(require("./transcribe-speech/TranscriptionModel.cjs"), exports);
|
53
|
-
__exportStar(require("./transcribe-speech/transcribe.cjs"), exports);
|
@@ -5,9 +5,6 @@ export * from "./ModelCallMetadata.js";
|
|
5
5
|
export * from "./ModelInformation.js";
|
6
6
|
export * from "./PromptFormat.js";
|
7
7
|
export * from "./SuccessfulModelCall.js";
|
8
|
-
export * from "./describe-image/ImageDescriptionEvent.js";
|
9
|
-
export * from "./describe-image/ImageDescriptionModel.js";
|
10
|
-
export * from "./describe-image/describeImage.js";
|
11
8
|
export * from "./embed/EmbeddingEvent.js";
|
12
9
|
export * from "./embed/EmbeddingModel.js";
|
13
10
|
export * from "./embed/embed.js";
|
@@ -15,23 +12,11 @@ export * from "./generate-image/ImageGenerationEvent.js";
|
|
15
12
|
export * from "./generate-image/ImageGenerationModel.js";
|
16
13
|
export * from "./generate-image/PromptFormatImageGenerationModel.js";
|
17
14
|
export * from "./generate-image/generateImage.js";
|
18
|
-
export * from "./generate-
|
19
|
-
export * from "./generate-structure/
|
20
|
-
export * from "./generate-structure/StructureGenerationEvent.js";
|
21
|
-
export * from "./generate-structure/StructureGenerationModel.js";
|
22
|
-
export * from "./generate-structure/StructureOrTextGenerationModel.js";
|
23
|
-
export * from "./generate-structure/StructureParseError.js";
|
24
|
-
export * from "./generate-structure/StructureStreamingEvent.js";
|
25
|
-
export * from "./generate-structure/StructureValidationError.js";
|
26
|
-
export * from "./generate-structure/generateStructure.js";
|
27
|
-
export * from "./generate-structure/generateStructureOrText.js";
|
28
|
-
export * from "./generate-structure/streamStructure.js";
|
15
|
+
export * from "./generate-speech/index.js";
|
16
|
+
export * from "./generate-structure/index.js";
|
29
17
|
export * from "./generate-text/index.js";
|
30
|
-
export * from "./
|
31
|
-
export * from "./
|
32
|
-
export * from "./
|
18
|
+
export * from "./generate-transcription/TranscriptionEvent.js";
|
19
|
+
export * from "./generate-transcription/TranscriptionModel.js";
|
20
|
+
export * from "./generate-transcription/generateTranscription.js";
|
33
21
|
export * from "./tokenize-text/Tokenizer.js";
|
34
22
|
export * from "./tokenize-text/countTokens.js";
|
35
|
-
export * from "./transcribe-speech/TranscriptionEvent.js";
|
36
|
-
export * from "./transcribe-speech/TranscriptionModel.js";
|
37
|
-
export * from "./transcribe-speech/transcribe.js";
|
package/model-function/index.js
CHANGED
@@ -5,9 +5,6 @@ export * from "./ModelCallMetadata.js";
|
|
5
5
|
export * from "./ModelInformation.js";
|
6
6
|
export * from "./PromptFormat.js";
|
7
7
|
export * from "./SuccessfulModelCall.js";
|
8
|
-
export * from "./describe-image/ImageDescriptionEvent.js";
|
9
|
-
export * from "./describe-image/ImageDescriptionModel.js";
|
10
|
-
export * from "./describe-image/describeImage.js";
|
11
8
|
export * from "./embed/EmbeddingEvent.js";
|
12
9
|
export * from "./embed/EmbeddingModel.js";
|
13
10
|
export * from "./embed/embed.js";
|
@@ -15,23 +12,11 @@ export * from "./generate-image/ImageGenerationEvent.js";
|
|
15
12
|
export * from "./generate-image/ImageGenerationModel.js";
|
16
13
|
export * from "./generate-image/PromptFormatImageGenerationModel.js";
|
17
14
|
export * from "./generate-image/generateImage.js";
|
18
|
-
export * from "./generate-
|
19
|
-
export * from "./generate-structure/
|
20
|
-
export * from "./generate-structure/StructureGenerationEvent.js";
|
21
|
-
export * from "./generate-structure/StructureGenerationModel.js";
|
22
|
-
export * from "./generate-structure/StructureOrTextGenerationModel.js";
|
23
|
-
export * from "./generate-structure/StructureParseError.js";
|
24
|
-
export * from "./generate-structure/StructureStreamingEvent.js";
|
25
|
-
export * from "./generate-structure/StructureValidationError.js";
|
26
|
-
export * from "./generate-structure/generateStructure.js";
|
27
|
-
export * from "./generate-structure/generateStructureOrText.js";
|
28
|
-
export * from "./generate-structure/streamStructure.js";
|
15
|
+
export * from "./generate-speech/index.js";
|
16
|
+
export * from "./generate-structure/index.js";
|
29
17
|
export * from "./generate-text/index.js";
|
30
|
-
export * from "./
|
31
|
-
export * from "./
|
32
|
-
export * from "./
|
18
|
+
export * from "./generate-transcription/TranscriptionEvent.js";
|
19
|
+
export * from "./generate-transcription/TranscriptionModel.js";
|
20
|
+
export * from "./generate-transcription/generateTranscription.js";
|
33
21
|
export * from "./tokenize-text/Tokenizer.js";
|
34
22
|
export * from "./tokenize-text/countTokens.js";
|
35
|
-
export * from "./transcribe-speech/TranscriptionEvent.js";
|
36
|
-
export * from "./transcribe-speech/TranscriptionModel.js";
|
37
|
-
export * from "./transcribe-speech/transcribe.js";
|
@@ -0,0 +1,191 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.ElevenLabsSpeechModel = void 0;
|
4
|
+
const zod_1 = require("zod");
|
5
|
+
const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
|
6
|
+
const postToApi_js_1 = require("../../core/api/postToApi.cjs");
|
7
|
+
const AsyncQueue_js_1 = require("../../event-source/AsyncQueue.cjs");
|
8
|
+
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
9
|
+
const SimpleWebSocket_js_1 = require("../../util/SimpleWebSocket.cjs");
|
10
|
+
const parseJSON_js_1 = require("../../util/parseJSON.cjs");
|
11
|
+
const ElevenLabsApiConfiguration_js_1 = require("./ElevenLabsApiConfiguration.cjs");
|
12
|
+
const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
|
13
|
+
const elevenLabsModels = [
|
14
|
+
"eleven_multilingual_v2",
|
15
|
+
"eleven_multilingual_v1",
|
16
|
+
"eleven_monolingual_v1",
|
17
|
+
];
|
18
|
+
const defaultModel = "eleven_multilingual_v2";
|
19
|
+
/**
|
20
|
+
* Synthesize speech using the ElevenLabs Text to Speech API.
|
21
|
+
*
|
22
|
+
* @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
|
23
|
+
*/
|
24
|
+
class ElevenLabsSpeechModel extends AbstractModel_js_1.AbstractModel {
|
25
|
+
constructor(settings) {
|
26
|
+
super({ settings });
|
27
|
+
Object.defineProperty(this, "provider", {
|
28
|
+
enumerable: true,
|
29
|
+
configurable: true,
|
30
|
+
writable: true,
|
31
|
+
value: "elevenlabs"
|
32
|
+
});
|
33
|
+
}
|
34
|
+
get modelName() {
|
35
|
+
return this.settings.voice;
|
36
|
+
}
|
37
|
+
async callAPI(text, options) {
|
38
|
+
return (0, callWithRetryAndThrottle_js_1.callWithRetryAndThrottle)({
|
39
|
+
retry: this.settings.api?.retry,
|
40
|
+
throttle: this.settings.api?.throttle,
|
41
|
+
call: async () => callElevenLabsTextToSpeechAPI({
|
42
|
+
api: this.settings.api,
|
43
|
+
abortSignal: options?.run?.abortSignal,
|
44
|
+
text,
|
45
|
+
voiceId: this.settings.voice,
|
46
|
+
modelId: this.settings.model,
|
47
|
+
voiceSettings: this.settings.voiceSettings,
|
48
|
+
}),
|
49
|
+
});
|
50
|
+
}
|
51
|
+
get settingsForEvent() {
|
52
|
+
return {
|
53
|
+
model: this.settings.model,
|
54
|
+
voice: this.settings.voice,
|
55
|
+
voiceSettings: this.settings.voiceSettings,
|
56
|
+
};
|
57
|
+
}
|
58
|
+
doGenerateSpeechStandard(text, options) {
|
59
|
+
return this.callAPI(text, options);
|
60
|
+
}
|
61
|
+
async doGenerateSpeechStreamDuplex(textStream
|
62
|
+
// options?: FunctionOptions | undefined
|
63
|
+
) {
|
64
|
+
const responseSchema = zod_1.z.union([
|
65
|
+
zod_1.z.object({
|
66
|
+
audio: zod_1.z.string(),
|
67
|
+
isFinal: zod_1.z.literal(false).nullable(),
|
68
|
+
normalizedAlignment: zod_1.z
|
69
|
+
.object({
|
70
|
+
chars: zod_1.z.array(zod_1.z.string()),
|
71
|
+
charStartTimesMs: zod_1.z.array(zod_1.z.number()),
|
72
|
+
charDurationsMs: zod_1.z.array(zod_1.z.number()),
|
73
|
+
})
|
74
|
+
.nullable(),
|
75
|
+
}),
|
76
|
+
zod_1.z.object({
|
77
|
+
isFinal: zod_1.z.literal(true),
|
78
|
+
}),
|
79
|
+
zod_1.z.object({
|
80
|
+
message: zod_1.z.string(),
|
81
|
+
error: zod_1.z.string(),
|
82
|
+
code: zod_1.z.number(),
|
83
|
+
}),
|
84
|
+
]);
|
85
|
+
const queue = new AsyncQueue_js_1.AsyncQueue();
|
86
|
+
const model = this.settings.model ?? defaultModel;
|
87
|
+
const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input?model_id=${model}`);
|
88
|
+
socket.onopen = async () => {
|
89
|
+
const api = this.settings.api ?? new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration();
|
90
|
+
// send begin-of-stream (BOS) message:
|
91
|
+
socket.send(JSON.stringify({
|
92
|
+
// The JS WebSocket API does not support authorization headers, so we send the API key in the BOS message.
|
93
|
+
// See https://stackoverflow.com/questions/4361173/http-headers-in-websockets-client-api
|
94
|
+
xi_api_key: api.apiKey,
|
95
|
+
text: " ",
|
96
|
+
voice_settings: toApiVoiceSettings(this.settings.voiceSettings),
|
97
|
+
generation_config: toGenerationConfig(this.settings.generationConfig),
|
98
|
+
}));
|
99
|
+
// send text in chunks:
|
100
|
+
let textBuffer = "";
|
101
|
+
for await (const textDelta of textStream) {
|
102
|
+
textBuffer += textDelta;
|
103
|
+
// using ". " as separator: sending in full sentences improves the quality
|
104
|
+
// of the audio output significantly.
|
105
|
+
const separator = textBuffer.lastIndexOf(". ");
|
106
|
+
if (separator === -1) {
|
107
|
+
continue;
|
108
|
+
}
|
109
|
+
const textToProcess = textBuffer.slice(0, separator);
|
110
|
+
textBuffer = textBuffer.slice(separator + 1);
|
111
|
+
socket.send(JSON.stringify({
|
112
|
+
text: textToProcess,
|
113
|
+
try_trigger_generation: true,
|
114
|
+
}));
|
115
|
+
}
|
116
|
+
// send remaining text:
|
117
|
+
if (textBuffer.length > 0) {
|
118
|
+
socket.send(JSON.stringify({
|
119
|
+
text: `${textBuffer} `,
|
120
|
+
try_trigger_generation: true,
|
121
|
+
}));
|
122
|
+
}
|
123
|
+
// send end-of-stream (EOS) message:
|
124
|
+
socket.send(JSON.stringify({ text: "" }));
|
125
|
+
};
|
126
|
+
socket.onmessage = (event) => {
|
127
|
+
const parseResult = (0, parseJSON_js_1.safeParseJsonWithZod)(event.data, responseSchema);
|
128
|
+
if (!parseResult.success) {
|
129
|
+
queue.push({ type: "error", error: parseResult.error });
|
130
|
+
return;
|
131
|
+
}
|
132
|
+
const response = parseResult.data;
|
133
|
+
if ("error" in response) {
|
134
|
+
queue.push({ type: "error", error: response });
|
135
|
+
return;
|
136
|
+
}
|
137
|
+
if (!response.isFinal) {
|
138
|
+
queue.push({
|
139
|
+
type: "delta",
|
140
|
+
fullDelta: event,
|
141
|
+
valueDelta: Buffer.from(response.audio, "base64"),
|
142
|
+
});
|
143
|
+
}
|
144
|
+
};
|
145
|
+
socket.onerror = (error) => {
|
146
|
+
queue.push({ type: "error", error });
|
147
|
+
};
|
148
|
+
socket.onclose = () => {
|
149
|
+
queue.close();
|
150
|
+
};
|
151
|
+
return queue;
|
152
|
+
}
|
153
|
+
withSettings(additionalSettings) {
|
154
|
+
return new ElevenLabsSpeechModel({
|
155
|
+
...this.settings,
|
156
|
+
...additionalSettings,
|
157
|
+
});
|
158
|
+
}
|
159
|
+
}
|
160
|
+
exports.ElevenLabsSpeechModel = ElevenLabsSpeechModel;
|
161
|
+
async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration(), abortSignal, text, voiceId, modelId, voiceSettings, }) {
|
162
|
+
return (0, postToApi_js_1.postJsonToApi)({
|
163
|
+
url: api.assembleUrl(`/text-to-speech/${voiceId}`),
|
164
|
+
headers: api.headers,
|
165
|
+
body: {
|
166
|
+
text,
|
167
|
+
model_id: modelId ?? defaultModel,
|
168
|
+
voice_settings: toApiVoiceSettings(voiceSettings),
|
169
|
+
},
|
170
|
+
failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
|
171
|
+
successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
|
172
|
+
abortSignal,
|
173
|
+
});
|
174
|
+
}
|
175
|
+
function toApiVoiceSettings(voiceSettings) {
|
176
|
+
return voiceSettings != null
|
177
|
+
? {
|
178
|
+
stability: voiceSettings.stability,
|
179
|
+
similarity_boost: voiceSettings.similarityBoost,
|
180
|
+
style: voiceSettings.style,
|
181
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
182
|
+
}
|
183
|
+
: undefined;
|
184
|
+
}
|
185
|
+
function toGenerationConfig(generationConfig) {
|
186
|
+
return generationConfig != null
|
187
|
+
? {
|
188
|
+
chunk_length_schedule: generationConfig.chunkLengthSchedule,
|
189
|
+
}
|
190
|
+
: undefined;
|
191
|
+
}
|
@@ -0,0 +1,39 @@
|
|
1
|
+
/// <reference types="node" />
|
2
|
+
import { FunctionOptions } from "../../core/FunctionOptions.js";
|
3
|
+
import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
|
4
|
+
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
5
|
+
import { Delta } from "../../model-function/Delta.js";
|
6
|
+
import { StreamingSpeechGenerationModel, SpeechGenerationModelSettings } from "../../model-function/generate-speech/SpeechGenerationModel.js";
|
7
|
+
declare const elevenLabsModels: readonly ["eleven_multilingual_v2", "eleven_multilingual_v1", "eleven_monolingual_v1"];
|
8
|
+
export interface ElevenLabsSpeechModelSettings extends SpeechGenerationModelSettings {
|
9
|
+
api?: ApiConfiguration & {
|
10
|
+
apiKey: string;
|
11
|
+
};
|
12
|
+
voice: string;
|
13
|
+
model?: (typeof elevenLabsModels)[number] | (string & {});
|
14
|
+
voiceSettings?: {
|
15
|
+
stability: number;
|
16
|
+
similarityBoost: number;
|
17
|
+
style?: number;
|
18
|
+
useSpeakerBoost?: boolean;
|
19
|
+
};
|
20
|
+
generationConfig?: {
|
21
|
+
chunkLengthSchedule: number[];
|
22
|
+
};
|
23
|
+
}
|
24
|
+
/**
|
25
|
+
* Synthesize speech using the ElevenLabs Text to Speech API.
|
26
|
+
*
|
27
|
+
* @see https://api.elevenlabs.io/docs#/text-to-speech/Text_to_speech_v1_text_to_speech__voice_id__post
|
28
|
+
*/
|
29
|
+
export declare class ElevenLabsSpeechModel extends AbstractModel<ElevenLabsSpeechModelSettings> implements StreamingSpeechGenerationModel<ElevenLabsSpeechModelSettings> {
|
30
|
+
constructor(settings: ElevenLabsSpeechModelSettings);
|
31
|
+
readonly provider = "elevenlabs";
|
32
|
+
get modelName(): string;
|
33
|
+
private callAPI;
|
34
|
+
get settingsForEvent(): Partial<ElevenLabsSpeechModelSettings>;
|
35
|
+
doGenerateSpeechStandard(text: string, options?: FunctionOptions): Promise<Buffer>;
|
36
|
+
doGenerateSpeechStreamDuplex(textStream: AsyncIterable<string>): Promise<AsyncIterable<Delta<Buffer>>>;
|
37
|
+
withSettings(additionalSettings: Partial<ElevenLabsSpeechModelSettings>): this;
|
38
|
+
}
|
39
|
+
export {};
|