modelfusion 0.47.2 → 0.48.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -3
- package/core/api/loadApiKey.cjs +7 -1
- package/core/api/loadApiKey.js +7 -1
- package/core/getRun.cjs +5 -3
- package/core/getRun.js +5 -3
- package/index.cjs +1 -0
- package/index.d.ts +1 -0
- package/index.js +1 -0
- package/model-function/synthesize-speech/SpeechSynthesisModel.d.ts +5 -1
- package/model-function/synthesize-speech/synthesizeSpeech.cjs +60 -17
- package/model-function/synthesize-speech/synthesizeSpeech.d.ts +8 -2
- package/model-function/synthesize-speech/synthesizeSpeech.js +59 -16
- package/model-provider/elevenlabs/ElevenLabsApiConfiguration.cjs +3 -0
- package/model-provider/elevenlabs/ElevenLabsApiConfiguration.d.ts +1 -0
- package/model-provider/elevenlabs/ElevenLabsApiConfiguration.js +3 -0
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.cjs +122 -10
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.d.ts +12 -3
- package/model-provider/elevenlabs/ElevenLabsSpeechSynthesisModel.js +122 -10
- package/model-provider/lmnt/LmntSpeechSynthesisModel.cjs +1 -1
- package/model-provider/lmnt/LmntSpeechSynthesisModel.d.ts +1 -1
- package/model-provider/lmnt/LmntSpeechSynthesisModel.js +1 -1
- package/model-provider/openai/TikTokenTokenizer.cjs +12 -8
- package/model-provider/openai/TikTokenTokenizer.d.ts +0 -3
- package/model-provider/openai/TikTokenTokenizer.js +9 -8
- package/package.json +3 -1
- package/ui/MediaSourceAppender.cjs +54 -0
- package/ui/MediaSourceAppender.d.ts +11 -0
- package/ui/MediaSourceAppender.js +50 -0
- package/ui/index.cjs +17 -0
- package/ui/index.d.ts +1 -0
- package/ui/index.js +1 -0
- package/util/SimpleWebSocket.cjs +41 -0
- package/util/SimpleWebSocket.d.ts +12 -0
- package/util/SimpleWebSocket.js +14 -0
package/README.md
CHANGED
@@ -17,7 +17,7 @@
|
|
17
17
|
|
18
18
|
ModelFusion is a library for building AI apps, chatbots, and agents. It provides abstractions for AI models, vector indices, and tools.
|
19
19
|
|
20
|
-
- **Multimodal Support**: Beyond just LLMs, ModelFusion encompasses a diverse array of models including text generation, text-to-speech, speech-to-text, and image generation, allowing you to build
|
20
|
+
- **Multimodal Support**: Beyond just LLMs, ModelFusion encompasses a diverse array of models including text generation, text-to-speech, speech-to-text, and image generation, allowing you to build multi-modal AI applications with ease.
|
21
21
|
- **Flexibility and control**: AI application development can be complex and unique to each project. With ModelFusion, you have complete control over the prompts and model settings, and you can access the raw responses from the models quickly to build what you need.
|
22
22
|
- **Type inference and validation**: ModelFusion uses TypeScript to infer types wherever possible and to validate model responses. By default, [Zod](https://github.com/colinhacks/zod) is used for type validation, but you can also use other libraries.
|
23
23
|
- **Guards**: ModelFusion provides a guard function that you can use to implement retry on error, redacting and changing reponses, etc.
|
@@ -274,7 +274,11 @@ Providers: [OpenAI (Whisper)](https://modelfusion.dev/integration/model-provider
|
|
274
274
|
|
275
275
|
### [Synthesize Speech](https://modelfusion.dev/guide/function/synthesize-speech)
|
276
276
|
|
277
|
-
|
277
|
+
Generate speech (audio) from text. Also called TTS (text-to-speech).
|
278
|
+
|
279
|
+
Providers: [Eleven Labs](https://modelfusion.dev/integration/model-provider/elevenlabs), [LMNT](https://modelfusion.dev/integration/model-provider/lmnt)
|
280
|
+
|
281
|
+
#### Standard mode
|
278
282
|
|
279
283
|
```ts
|
280
284
|
// `speech` is a Buffer with MP3 audio data
|
@@ -289,7 +293,28 @@ const speech = await synthesizeSpeech(
|
|
289
293
|
);
|
290
294
|
```
|
291
295
|
|
292
|
-
|
296
|
+
#### Duplex streaming mode
|
297
|
+
|
298
|
+
```ts
|
299
|
+
const textStream = await streamText(/* ... */);
|
300
|
+
|
301
|
+
const speechStream = await synthesizeSpeech(
|
302
|
+
new ElevenLabsSpeechSynthesisModel({
|
303
|
+
voice: "pNInz6obpgDQGcFmaJgB", // Adam
|
304
|
+
model: "eleven_monolingual_v1",
|
305
|
+
voiceSettings: { stability: 1, similarityBoost: 0.35 },
|
306
|
+
generationConfig: {
|
307
|
+
chunkLengthSchedule: [50, 90, 120, 150, 200],
|
308
|
+
},
|
309
|
+
}),
|
310
|
+
textStream,
|
311
|
+
{ mode: "stream-duplex" }
|
312
|
+
);
|
313
|
+
|
314
|
+
for await (const part of speechStream) {
|
315
|
+
// each part is a Buffer with MP3 audio data
|
316
|
+
}
|
317
|
+
```
|
293
318
|
|
294
319
|
### [Describe Image](https://modelfusion.dev/guide/function/describe-image)
|
295
320
|
|
@@ -603,6 +628,12 @@ Create an 19th century painting image for your input.
|
|
603
628
|
|
604
629
|
Record audio with push-to-talk and transcribe it using Whisper, implemented as a Next.js app. The app shows a list of the transcriptions.
|
605
630
|
|
631
|
+
### [Duplex Speech Streaming (Vite(React) + Fastify))](https://github.com/lgrammel/modelfusion/tree/main/examples/duplex-speech-streaming-vite-react-fastify)
|
632
|
+
|
633
|
+
> _Speech Streaming_, _OpenAI_, _Elevenlabs_ _streaming_, _Vite_, _Fastify_
|
634
|
+
|
635
|
+
Given a prompt, the server returns both a text and a speech stream response.
|
636
|
+
|
606
637
|
### [BabyAGI Agent](https://github.com/lgrammel/modelfusion/tree/main/examples/babyagi-agent)
|
607
638
|
|
608
639
|
> _terminal app_, _agent_, _BabyAGI_
|
@@ -627,6 +658,12 @@ Small agent that solves middle school math problems. It uses a calculator tool t
|
|
627
658
|
|
628
659
|
Extracts information about a topic from a PDF and writes a tweet in your own style about it.
|
629
660
|
|
661
|
+
### [Cloudflare Workers](https://github.com/lgrammel/modelfusion/tree/main/examples/cloudflare-workers)
|
662
|
+
|
663
|
+
> _Cloudflare_, _OpenAI_
|
664
|
+
|
665
|
+
Generate text on a Cloudflare Worker using ModelFusion and OpenAI.
|
666
|
+
|
630
667
|
## Contributing
|
631
668
|
|
632
669
|
### [Contributing Guide](https://github.com/lgrammel/modelfusion/blob/main/CONTRIBUTING.md)
|
package/core/api/loadApiKey.cjs
CHANGED
@@ -2,7 +2,13 @@
|
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.loadApiKey = void 0;
|
4
4
|
function loadApiKey({ apiKey, environmentVariableName, apiKeyParameterName = "apiKey", description, }) {
|
5
|
-
apiKey
|
5
|
+
if (apiKey != null) {
|
6
|
+
return apiKey;
|
7
|
+
}
|
8
|
+
if (typeof process === "undefined") {
|
9
|
+
throw new Error(`${description} API key is missing. Pass it using the '${apiKeyParameterName}' parameter. Environment variables is not supported in this environment.`);
|
10
|
+
}
|
11
|
+
apiKey = process.env[environmentVariableName];
|
6
12
|
if (apiKey == null) {
|
7
13
|
throw new Error(`${description} API key is missing. Pass it using the '${apiKeyParameterName}' parameter or set it as an environment variable named ${environmentVariableName}.`);
|
8
14
|
}
|
package/core/api/loadApiKey.js
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
export function loadApiKey({ apiKey, environmentVariableName, apiKeyParameterName = "apiKey", description, }) {
|
2
|
-
apiKey
|
2
|
+
if (apiKey != null) {
|
3
|
+
return apiKey;
|
4
|
+
}
|
5
|
+
if (typeof process === "undefined") {
|
6
|
+
throw new Error(`${description} API key is missing. Pass it using the '${apiKeyParameterName}' parameter. Environment variables is not supported in this environment.`);
|
7
|
+
}
|
8
|
+
apiKey = process.env[environmentVariableName];
|
3
9
|
if (apiKey == null) {
|
4
10
|
throw new Error(`${description} API key is missing. Pass it using the '${apiKeyParameterName}' parameter or set it as an environment variable named ${environmentVariableName}.`);
|
5
11
|
}
|
package/core/getRun.cjs
CHANGED
@@ -25,10 +25,12 @@ var __importStar = (this && this.__importStar) || function (mod) {
|
|
25
25
|
Object.defineProperty(exports, "__esModule", { value: true });
|
26
26
|
exports.withRun = exports.getRun = void 0;
|
27
27
|
let runStorage;
|
28
|
-
const isNode = typeof process !== "undefined" &&
|
29
|
-
process.versions != null &&
|
30
|
-
process.versions.node != null;
|
31
28
|
async function ensureLoaded() {
|
29
|
+
// Note: using process[versions] instead of process.versions to avoid Next.js edge runtime warnings.
|
30
|
+
const versions = "versions";
|
31
|
+
const isNode = typeof process !== "undefined" &&
|
32
|
+
process[versions] != null &&
|
33
|
+
process[versions].node != null;
|
32
34
|
if (!isNode)
|
33
35
|
return Promise.resolve();
|
34
36
|
if (!runStorage) {
|
package/core/getRun.js
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
let runStorage;
|
2
|
-
const isNode = typeof process !== "undefined" &&
|
3
|
-
process.versions != null &&
|
4
|
-
process.versions.node != null;
|
5
2
|
async function ensureLoaded() {
|
3
|
+
// Note: using process[versions] instead of process.versions to avoid Next.js edge runtime warnings.
|
4
|
+
const versions = "versions";
|
5
|
+
const isNode = typeof process !== "undefined" &&
|
6
|
+
process[versions] != null &&
|
7
|
+
process[versions].node != null;
|
6
8
|
if (!isNode)
|
7
9
|
return Promise.resolve();
|
8
10
|
if (!runStorage) {
|
package/index.cjs
CHANGED
@@ -25,5 +25,6 @@ __exportStar(require("./observability/index.cjs"), exports);
|
|
25
25
|
__exportStar(require("./retriever/index.cjs"), exports);
|
26
26
|
__exportStar(require("./text-chunk/index.cjs"), exports);
|
27
27
|
__exportStar(require("./tool/index.cjs"), exports);
|
28
|
+
__exportStar(require("./ui/index.cjs"), exports);
|
28
29
|
__exportStar(require("./util/index.cjs"), exports);
|
29
30
|
__exportStar(require("./vector-index/index.cjs"), exports);
|
package/index.d.ts
CHANGED
@@ -9,5 +9,6 @@ export * from "./observability/index.js";
|
|
9
9
|
export * from "./retriever/index.js";
|
10
10
|
export * from "./text-chunk/index.js";
|
11
11
|
export * from "./tool/index.js";
|
12
|
+
export * from "./ui/index.js";
|
12
13
|
export * from "./util/index.js";
|
13
14
|
export * from "./vector-index/index.js";
|
package/index.js
CHANGED
@@ -9,5 +9,6 @@ export * from "./observability/index.js";
|
|
9
9
|
export * from "./retriever/index.js";
|
10
10
|
export * from "./text-chunk/index.js";
|
11
11
|
export * from "./tool/index.js";
|
12
|
+
export * from "./ui/index.js";
|
12
13
|
export * from "./util/index.js";
|
13
14
|
export * from "./vector-index/index.js";
|
@@ -1,5 +1,6 @@
|
|
1
1
|
/// <reference types="node" />
|
2
2
|
import { FunctionOptions } from "../../core/FunctionOptions.js";
|
3
|
+
import { Delta } from "../../model-function/Delta.js";
|
3
4
|
import { Model, ModelSettings } from "../Model.js";
|
4
5
|
export interface SpeechSynthesisModelSettings extends ModelSettings {
|
5
6
|
}
|
@@ -7,5 +8,8 @@ export interface SpeechSynthesisModel<SETTINGS extends SpeechSynthesisModelSetti
|
|
7
8
|
/**
|
8
9
|
* Generates an mp3 audio buffer that contains the speech for the given text.
|
9
10
|
*/
|
10
|
-
|
11
|
+
doSynthesizeSpeechStandard(text: string, options?: FunctionOptions): PromiseLike<Buffer>;
|
12
|
+
}
|
13
|
+
export interface DuplexSpeechSynthesisModel<SETTINGS extends SpeechSynthesisModelSettings = SpeechSynthesisModelSettings> extends SpeechSynthesisModel<SETTINGS> {
|
14
|
+
doSynthesizeSpeechStreamDuplex(textStream: AsyncIterable<string>, options?: FunctionOptions): PromiseLike<AsyncIterable<Delta<Buffer>>>;
|
11
15
|
}
|
@@ -1,24 +1,67 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.synthesizeSpeech = void 0;
|
4
|
-
const
|
4
|
+
const AsyncIterableResultPromise_js_1 = require("../../model-function/AsyncIterableResultPromise.cjs");
|
5
5
|
const ModelFunctionPromise_js_1 = require("../ModelFunctionPromise.cjs");
|
6
|
-
|
7
|
-
* Synthesizes speech from text.
|
8
|
-
*/
|
6
|
+
const executeCall_js_1 = require("../executeCall.cjs");
|
9
7
|
function synthesizeSpeech(model, text, options) {
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
8
|
+
const mode = options?.mode ?? "standard";
|
9
|
+
switch (mode) {
|
10
|
+
case "standard": {
|
11
|
+
if (typeof text !== "string") {
|
12
|
+
throw new Error(`The "standard" mode only supports a string input, but received ${text}`);
|
13
|
+
}
|
14
|
+
return new ModelFunctionPromise_js_1.ModelFunctionPromise((0, executeCall_js_1.executeCall)({
|
15
|
+
functionType: "speech-synthesis",
|
16
|
+
input: text,
|
17
|
+
model,
|
18
|
+
options,
|
19
|
+
generateResponse: async (options) => {
|
20
|
+
const response = await model.doSynthesizeSpeechStandard(text, options);
|
21
|
+
return {
|
22
|
+
response,
|
23
|
+
extractedValue: response,
|
24
|
+
};
|
25
|
+
},
|
26
|
+
}));
|
27
|
+
}
|
28
|
+
case "stream-duplex": {
|
29
|
+
if (typeof text === "string") {
|
30
|
+
throw new Error(`The "stream-duplex" mode only supports an AsyncIterable<string> input, but received ${text}`);
|
31
|
+
}
|
32
|
+
if (!("doSynthesizeSpeechStreamDuplex" in model) ||
|
33
|
+
typeof model.doSynthesizeSpeechStreamDuplex !== "function") {
|
34
|
+
throw new Error(`The "stream-duplex" mode is not supported by this model.`);
|
35
|
+
}
|
36
|
+
return new AsyncIterableResultPromise_js_1.AsyncIterableResultPromise(doSynthesizeSpeechStreamDuplex(model, text, options));
|
37
|
+
}
|
38
|
+
default: {
|
39
|
+
const mode_ = mode;
|
40
|
+
throw new Error(`Unsupported mode: ${mode_}`);
|
41
|
+
}
|
42
|
+
}
|
23
43
|
}
|
24
44
|
exports.synthesizeSpeech = synthesizeSpeech;
|
45
|
+
async function doSynthesizeSpeechStreamDuplex(model, text, options) {
|
46
|
+
const speechDeltas = await model.doSynthesizeSpeechStreamDuplex(text, options);
|
47
|
+
// Convert the speechDeltas (AsyncIterable<Delta<Buffer>>) to an AsyncIterable<Buffer>
|
48
|
+
const bufferStream = convertDeltasToBuffers(speechDeltas);
|
49
|
+
return {
|
50
|
+
output: bufferStream,
|
51
|
+
metadata: {
|
52
|
+
model: model.modelInformation,
|
53
|
+
callId: "test",
|
54
|
+
startTimestamp: new Date(),
|
55
|
+
},
|
56
|
+
};
|
57
|
+
}
|
58
|
+
async function* convertDeltasToBuffers(deltas) {
|
59
|
+
for await (const delta of deltas) {
|
60
|
+
switch (delta.type) {
|
61
|
+
case "error":
|
62
|
+
throw delta.error;
|
63
|
+
case "delta":
|
64
|
+
yield delta.valueDelta;
|
65
|
+
}
|
66
|
+
}
|
67
|
+
}
|
@@ -1,8 +1,14 @@
|
|
1
1
|
/// <reference types="node" />
|
2
2
|
import { FunctionOptions } from "../../core/FunctionOptions.js";
|
3
|
+
import { AsyncIterableResultPromise } from "../../model-function/AsyncIterableResultPromise.js";
|
3
4
|
import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
|
4
|
-
import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
|
5
|
+
import { DuplexSpeechSynthesisModel, SpeechSynthesisModel, SpeechSynthesisModelSettings } from "./SpeechSynthesisModel.js";
|
5
6
|
/**
|
6
7
|
* Synthesizes speech from text.
|
7
8
|
*/
|
8
|
-
export declare function synthesizeSpeech(model: SpeechSynthesisModel<SpeechSynthesisModelSettings>, text: string, options?: FunctionOptions
|
9
|
+
export declare function synthesizeSpeech(model: SpeechSynthesisModel<SpeechSynthesisModelSettings>, text: string, options?: FunctionOptions & {
|
10
|
+
mode?: "standard";
|
11
|
+
}): ModelFunctionPromise<Buffer>;
|
12
|
+
export declare function synthesizeSpeech(model: DuplexSpeechSynthesisModel<SpeechSynthesisModelSettings>, text: AsyncIterable<string>, options: FunctionOptions & {
|
13
|
+
mode: "stream-duplex";
|
14
|
+
}): AsyncIterableResultPromise<Buffer>;
|
@@ -1,20 +1,63 @@
|
|
1
|
-
import {
|
1
|
+
import { AsyncIterableResultPromise } from "../../model-function/AsyncIterableResultPromise.js";
|
2
2
|
import { ModelFunctionPromise } from "../ModelFunctionPromise.js";
|
3
|
-
|
4
|
-
* Synthesizes speech from text.
|
5
|
-
*/
|
3
|
+
import { executeCall } from "../executeCall.js";
|
6
4
|
export function synthesizeSpeech(model, text, options) {
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
5
|
+
const mode = options?.mode ?? "standard";
|
6
|
+
switch (mode) {
|
7
|
+
case "standard": {
|
8
|
+
if (typeof text !== "string") {
|
9
|
+
throw new Error(`The "standard" mode only supports a string input, but received ${text}`);
|
10
|
+
}
|
11
|
+
return new ModelFunctionPromise(executeCall({
|
12
|
+
functionType: "speech-synthesis",
|
13
|
+
input: text,
|
14
|
+
model,
|
15
|
+
options,
|
16
|
+
generateResponse: async (options) => {
|
17
|
+
const response = await model.doSynthesizeSpeechStandard(text, options);
|
18
|
+
return {
|
19
|
+
response,
|
20
|
+
extractedValue: response,
|
21
|
+
};
|
22
|
+
},
|
23
|
+
}));
|
24
|
+
}
|
25
|
+
case "stream-duplex": {
|
26
|
+
if (typeof text === "string") {
|
27
|
+
throw new Error(`The "stream-duplex" mode only supports an AsyncIterable<string> input, but received ${text}`);
|
28
|
+
}
|
29
|
+
if (!("doSynthesizeSpeechStreamDuplex" in model) ||
|
30
|
+
typeof model.doSynthesizeSpeechStreamDuplex !== "function") {
|
31
|
+
throw new Error(`The "stream-duplex" mode is not supported by this model.`);
|
32
|
+
}
|
33
|
+
return new AsyncIterableResultPromise(doSynthesizeSpeechStreamDuplex(model, text, options));
|
34
|
+
}
|
35
|
+
default: {
|
36
|
+
const mode_ = mode;
|
37
|
+
throw new Error(`Unsupported mode: ${mode_}`);
|
38
|
+
}
|
39
|
+
}
|
40
|
+
}
|
41
|
+
async function doSynthesizeSpeechStreamDuplex(model, text, options) {
|
42
|
+
const speechDeltas = await model.doSynthesizeSpeechStreamDuplex(text, options);
|
43
|
+
// Convert the speechDeltas (AsyncIterable<Delta<Buffer>>) to an AsyncIterable<Buffer>
|
44
|
+
const bufferStream = convertDeltasToBuffers(speechDeltas);
|
45
|
+
return {
|
46
|
+
output: bufferStream,
|
47
|
+
metadata: {
|
48
|
+
model: model.modelInformation,
|
49
|
+
callId: "test",
|
50
|
+
startTimestamp: new Date(),
|
18
51
|
},
|
19
|
-
}
|
52
|
+
};
|
53
|
+
}
|
54
|
+
async function* convertDeltasToBuffers(deltas) {
|
55
|
+
for await (const delta of deltas) {
|
56
|
+
switch (delta.type) {
|
57
|
+
case "error":
|
58
|
+
throw delta.error;
|
59
|
+
case "delta":
|
60
|
+
yield delta.valueDelta;
|
61
|
+
}
|
62
|
+
}
|
20
63
|
}
|
@@ -1,11 +1,21 @@
|
|
1
1
|
"use strict";
|
2
2
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
3
|
exports.ElevenLabsSpeechSynthesisModel = void 0;
|
4
|
+
const zod_1 = require("zod");
|
4
5
|
const callWithRetryAndThrottle_js_1 = require("../../core/api/callWithRetryAndThrottle.cjs");
|
5
6
|
const postToApi_js_1 = require("../../core/api/postToApi.cjs");
|
7
|
+
const AsyncQueue_js_1 = require("../../event-source/AsyncQueue.cjs");
|
6
8
|
const AbstractModel_js_1 = require("../../model-function/AbstractModel.cjs");
|
9
|
+
const SimpleWebSocket_js_1 = require("../../util/SimpleWebSocket.cjs");
|
10
|
+
const parseJSON_js_1 = require("../../util/parseJSON.cjs");
|
7
11
|
const ElevenLabsApiConfiguration_js_1 = require("./ElevenLabsApiConfiguration.cjs");
|
8
12
|
const ElevenLabsError_js_1 = require("./ElevenLabsError.cjs");
|
13
|
+
const elevenLabsModels = [
|
14
|
+
"eleven_multilingual_v2",
|
15
|
+
"eleven_multilingual_v1",
|
16
|
+
"eleven_monolingual_v1",
|
17
|
+
];
|
18
|
+
const defaultModel = "eleven_multilingual_v2";
|
9
19
|
/**
|
10
20
|
* Synthesize speech using the ElevenLabs Text to Speech API.
|
11
21
|
*
|
@@ -45,9 +55,101 @@ class ElevenLabsSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
|
|
45
55
|
voiceSettings: this.settings.voiceSettings,
|
46
56
|
};
|
47
57
|
}
|
48
|
-
|
58
|
+
doSynthesizeSpeechStandard(text, options) {
|
49
59
|
return this.callAPI(text, options);
|
50
60
|
}
|
61
|
+
async doSynthesizeSpeechStreamDuplex(textStream
|
62
|
+
// options?: FunctionOptions | undefined
|
63
|
+
) {
|
64
|
+
const responseSchema = zod_1.z.union([
|
65
|
+
zod_1.z.object({
|
66
|
+
audio: zod_1.z.string(),
|
67
|
+
isFinal: zod_1.z.literal(false).nullable(),
|
68
|
+
normalizedAlignment: zod_1.z
|
69
|
+
.object({
|
70
|
+
chars: zod_1.z.array(zod_1.z.string()),
|
71
|
+
charStartTimesMs: zod_1.z.array(zod_1.z.number()),
|
72
|
+
charDurationsMs: zod_1.z.array(zod_1.z.number()),
|
73
|
+
})
|
74
|
+
.nullable(),
|
75
|
+
}),
|
76
|
+
zod_1.z.object({
|
77
|
+
isFinal: zod_1.z.literal(true),
|
78
|
+
}),
|
79
|
+
zod_1.z.object({
|
80
|
+
message: zod_1.z.string(),
|
81
|
+
error: zod_1.z.string(),
|
82
|
+
code: zod_1.z.number(),
|
83
|
+
}),
|
84
|
+
]);
|
85
|
+
const queue = new AsyncQueue_js_1.AsyncQueue();
|
86
|
+
const model = this.settings.model ?? defaultModel;
|
87
|
+
const socket = await (0, SimpleWebSocket_js_1.createSimpleWebSocket)(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input?model_id=${model}`);
|
88
|
+
socket.onopen = async () => {
|
89
|
+
const api = this.settings.api ?? new ElevenLabsApiConfiguration_js_1.ElevenLabsApiConfiguration();
|
90
|
+
// send begin-of-stream (BOS) message:
|
91
|
+
socket.send(JSON.stringify({
|
92
|
+
// The JS WebSocket API does not support authorization headers, so we send the API key in the BOS message.
|
93
|
+
// See https://stackoverflow.com/questions/4361173/http-headers-in-websockets-client-api
|
94
|
+
xi_api_key: api.apiKey,
|
95
|
+
text: " ",
|
96
|
+
voice_settings: toApiVoiceSettings(this.settings.voiceSettings),
|
97
|
+
generation_config: toGenerationConfig(this.settings.generationConfig),
|
98
|
+
}));
|
99
|
+
// send text in chunks:
|
100
|
+
let textBuffer = "";
|
101
|
+
for await (const textDelta of textStream) {
|
102
|
+
textBuffer += textDelta;
|
103
|
+
// using ". " as separator: sending in full sentences improves the quality
|
104
|
+
// of the audio output significantly.
|
105
|
+
const separator = textBuffer.lastIndexOf(". ");
|
106
|
+
if (separator === -1) {
|
107
|
+
continue;
|
108
|
+
}
|
109
|
+
const textToProcess = textBuffer.slice(0, separator);
|
110
|
+
textBuffer = textBuffer.slice(separator + 1);
|
111
|
+
socket.send(JSON.stringify({
|
112
|
+
text: textToProcess,
|
113
|
+
try_trigger_generation: true,
|
114
|
+
}));
|
115
|
+
}
|
116
|
+
// send remaining text:
|
117
|
+
if (textBuffer.length > 0) {
|
118
|
+
socket.send(JSON.stringify({
|
119
|
+
text: `${textBuffer} `,
|
120
|
+
try_trigger_generation: true,
|
121
|
+
}));
|
122
|
+
}
|
123
|
+
// send end-of-stream (EOS) message:
|
124
|
+
socket.send(JSON.stringify({ text: "" }));
|
125
|
+
};
|
126
|
+
socket.onmessage = (event) => {
|
127
|
+
const parseResult = (0, parseJSON_js_1.safeParseJsonWithZod)(event.data, responseSchema);
|
128
|
+
if (!parseResult.success) {
|
129
|
+
queue.push({ type: "error", error: parseResult.error });
|
130
|
+
return;
|
131
|
+
}
|
132
|
+
const response = parseResult.data;
|
133
|
+
if ("error" in response) {
|
134
|
+
queue.push({ type: "error", error: response });
|
135
|
+
return;
|
136
|
+
}
|
137
|
+
if (!response.isFinal) {
|
138
|
+
queue.push({
|
139
|
+
type: "delta",
|
140
|
+
fullDelta: event,
|
141
|
+
valueDelta: Buffer.from(response.audio, "base64"),
|
142
|
+
});
|
143
|
+
}
|
144
|
+
};
|
145
|
+
socket.onerror = (error) => {
|
146
|
+
queue.push({ type: "error", error });
|
147
|
+
};
|
148
|
+
socket.onclose = () => {
|
149
|
+
queue.close();
|
150
|
+
};
|
151
|
+
return queue;
|
152
|
+
}
|
51
153
|
withSettings(additionalSettings) {
|
52
154
|
return new ElevenLabsSpeechSynthesisModel({
|
53
155
|
...this.settings,
|
@@ -62,18 +164,28 @@ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfigurat
|
|
62
164
|
headers: api.headers,
|
63
165
|
body: {
|
64
166
|
text,
|
65
|
-
model_id: modelId,
|
66
|
-
voice_settings: voiceSettings
|
67
|
-
? {
|
68
|
-
stability: voiceSettings.stability,
|
69
|
-
similarity_boost: voiceSettings.similarityBoost,
|
70
|
-
style: voiceSettings.style,
|
71
|
-
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
72
|
-
}
|
73
|
-
: undefined,
|
167
|
+
model_id: modelId ?? defaultModel,
|
168
|
+
voice_settings: toApiVoiceSettings(voiceSettings),
|
74
169
|
},
|
75
170
|
failedResponseHandler: ElevenLabsError_js_1.failedElevenLabsCallResponseHandler,
|
76
171
|
successfulResponseHandler: (0, postToApi_js_1.createAudioMpegResponseHandler)(),
|
77
172
|
abortSignal,
|
78
173
|
});
|
79
174
|
}
|
175
|
+
function toApiVoiceSettings(voiceSettings) {
|
176
|
+
return voiceSettings != null
|
177
|
+
? {
|
178
|
+
stability: voiceSettings.stability,
|
179
|
+
similarity_boost: voiceSettings.similarityBoost,
|
180
|
+
style: voiceSettings.style,
|
181
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
182
|
+
}
|
183
|
+
: undefined;
|
184
|
+
}
|
185
|
+
function toGenerationConfig(generationConfig) {
|
186
|
+
return generationConfig != null
|
187
|
+
? {
|
188
|
+
chunk_length_schedule: generationConfig.chunkLengthSchedule,
|
189
|
+
}
|
190
|
+
: undefined;
|
191
|
+
}
|
@@ -2,17 +2,24 @@
|
|
2
2
|
import { FunctionOptions } from "../../core/FunctionOptions.js";
|
3
3
|
import { ApiConfiguration } from "../../core/api/ApiConfiguration.js";
|
4
4
|
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
5
|
+
import { Delta } from "../../model-function/Delta.js";
|
5
6
|
import { SpeechSynthesisModel, SpeechSynthesisModelSettings } from "../../model-function/synthesize-speech/SpeechSynthesisModel.js";
|
7
|
+
declare const elevenLabsModels: readonly ["eleven_multilingual_v2", "eleven_multilingual_v1", "eleven_monolingual_v1"];
|
6
8
|
export interface ElevenLabsSpeechSynthesisModelSettings extends SpeechSynthesisModelSettings {
|
7
|
-
api?: ApiConfiguration
|
9
|
+
api?: ApiConfiguration & {
|
10
|
+
apiKey: string;
|
11
|
+
};
|
8
12
|
voice: string;
|
9
|
-
model?: string;
|
13
|
+
model?: (typeof elevenLabsModels)[number] | (string & {});
|
10
14
|
voiceSettings?: {
|
11
15
|
stability: number;
|
12
16
|
similarityBoost: number;
|
13
17
|
style?: number;
|
14
18
|
useSpeakerBoost?: boolean;
|
15
19
|
};
|
20
|
+
generationConfig?: {
|
21
|
+
chunkLengthSchedule: number[];
|
22
|
+
};
|
16
23
|
}
|
17
24
|
/**
|
18
25
|
* Synthesize speech using the ElevenLabs Text to Speech API.
|
@@ -25,6 +32,8 @@ export declare class ElevenLabsSpeechSynthesisModel extends AbstractModel<Eleven
|
|
25
32
|
get modelName(): string;
|
26
33
|
private callAPI;
|
27
34
|
get settingsForEvent(): Partial<ElevenLabsSpeechSynthesisModelSettings>;
|
28
|
-
|
35
|
+
doSynthesizeSpeechStandard(text: string, options?: FunctionOptions): Promise<Buffer>;
|
36
|
+
doSynthesizeSpeechStreamDuplex(textStream: AsyncIterable<string>): Promise<AsyncIterable<Delta<Buffer>>>;
|
29
37
|
withSettings(additionalSettings: Partial<ElevenLabsSpeechSynthesisModelSettings>): this;
|
30
38
|
}
|
39
|
+
export {};
|
@@ -1,8 +1,18 @@
|
|
1
|
+
import { z } from "zod";
|
1
2
|
import { callWithRetryAndThrottle } from "../../core/api/callWithRetryAndThrottle.js";
|
2
3
|
import { createAudioMpegResponseHandler, postJsonToApi, } from "../../core/api/postToApi.js";
|
4
|
+
import { AsyncQueue } from "../../event-source/AsyncQueue.js";
|
3
5
|
import { AbstractModel } from "../../model-function/AbstractModel.js";
|
6
|
+
import { createSimpleWebSocket } from "../../util/SimpleWebSocket.js";
|
7
|
+
import { safeParseJsonWithZod } from "../../util/parseJSON.js";
|
4
8
|
import { ElevenLabsApiConfiguration } from "./ElevenLabsApiConfiguration.js";
|
5
9
|
import { failedElevenLabsCallResponseHandler } from "./ElevenLabsError.js";
|
10
|
+
const elevenLabsModels = [
|
11
|
+
"eleven_multilingual_v2",
|
12
|
+
"eleven_multilingual_v1",
|
13
|
+
"eleven_monolingual_v1",
|
14
|
+
];
|
15
|
+
const defaultModel = "eleven_multilingual_v2";
|
6
16
|
/**
|
7
17
|
* Synthesize speech using the ElevenLabs Text to Speech API.
|
8
18
|
*
|
@@ -42,9 +52,101 @@ export class ElevenLabsSpeechSynthesisModel extends AbstractModel {
|
|
42
52
|
voiceSettings: this.settings.voiceSettings,
|
43
53
|
};
|
44
54
|
}
|
45
|
-
|
55
|
+
doSynthesizeSpeechStandard(text, options) {
|
46
56
|
return this.callAPI(text, options);
|
47
57
|
}
|
58
|
+
async doSynthesizeSpeechStreamDuplex(textStream
|
59
|
+
// options?: FunctionOptions | undefined
|
60
|
+
) {
|
61
|
+
const responseSchema = z.union([
|
62
|
+
z.object({
|
63
|
+
audio: z.string(),
|
64
|
+
isFinal: z.literal(false).nullable(),
|
65
|
+
normalizedAlignment: z
|
66
|
+
.object({
|
67
|
+
chars: z.array(z.string()),
|
68
|
+
charStartTimesMs: z.array(z.number()),
|
69
|
+
charDurationsMs: z.array(z.number()),
|
70
|
+
})
|
71
|
+
.nullable(),
|
72
|
+
}),
|
73
|
+
z.object({
|
74
|
+
isFinal: z.literal(true),
|
75
|
+
}),
|
76
|
+
z.object({
|
77
|
+
message: z.string(),
|
78
|
+
error: z.string(),
|
79
|
+
code: z.number(),
|
80
|
+
}),
|
81
|
+
]);
|
82
|
+
const queue = new AsyncQueue();
|
83
|
+
const model = this.settings.model ?? defaultModel;
|
84
|
+
const socket = await createSimpleWebSocket(`wss://api.elevenlabs.io/v1/text-to-speech/${this.settings.voice}/stream-input?model_id=${model}`);
|
85
|
+
socket.onopen = async () => {
|
86
|
+
const api = this.settings.api ?? new ElevenLabsApiConfiguration();
|
87
|
+
// send begin-of-stream (BOS) message:
|
88
|
+
socket.send(JSON.stringify({
|
89
|
+
// The JS WebSocket API does not support authorization headers, so we send the API key in the BOS message.
|
90
|
+
// See https://stackoverflow.com/questions/4361173/http-headers-in-websockets-client-api
|
91
|
+
xi_api_key: api.apiKey,
|
92
|
+
text: " ",
|
93
|
+
voice_settings: toApiVoiceSettings(this.settings.voiceSettings),
|
94
|
+
generation_config: toGenerationConfig(this.settings.generationConfig),
|
95
|
+
}));
|
96
|
+
// send text in chunks:
|
97
|
+
let textBuffer = "";
|
98
|
+
for await (const textDelta of textStream) {
|
99
|
+
textBuffer += textDelta;
|
100
|
+
// using ". " as separator: sending in full sentences improves the quality
|
101
|
+
// of the audio output significantly.
|
102
|
+
const separator = textBuffer.lastIndexOf(". ");
|
103
|
+
if (separator === -1) {
|
104
|
+
continue;
|
105
|
+
}
|
106
|
+
const textToProcess = textBuffer.slice(0, separator);
|
107
|
+
textBuffer = textBuffer.slice(separator + 1);
|
108
|
+
socket.send(JSON.stringify({
|
109
|
+
text: textToProcess,
|
110
|
+
try_trigger_generation: true,
|
111
|
+
}));
|
112
|
+
}
|
113
|
+
// send remaining text:
|
114
|
+
if (textBuffer.length > 0) {
|
115
|
+
socket.send(JSON.stringify({
|
116
|
+
text: `${textBuffer} `,
|
117
|
+
try_trigger_generation: true,
|
118
|
+
}));
|
119
|
+
}
|
120
|
+
// send end-of-stream (EOS) message:
|
121
|
+
socket.send(JSON.stringify({ text: "" }));
|
122
|
+
};
|
123
|
+
socket.onmessage = (event) => {
|
124
|
+
const parseResult = safeParseJsonWithZod(event.data, responseSchema);
|
125
|
+
if (!parseResult.success) {
|
126
|
+
queue.push({ type: "error", error: parseResult.error });
|
127
|
+
return;
|
128
|
+
}
|
129
|
+
const response = parseResult.data;
|
130
|
+
if ("error" in response) {
|
131
|
+
queue.push({ type: "error", error: response });
|
132
|
+
return;
|
133
|
+
}
|
134
|
+
if (!response.isFinal) {
|
135
|
+
queue.push({
|
136
|
+
type: "delta",
|
137
|
+
fullDelta: event,
|
138
|
+
valueDelta: Buffer.from(response.audio, "base64"),
|
139
|
+
});
|
140
|
+
}
|
141
|
+
};
|
142
|
+
socket.onerror = (error) => {
|
143
|
+
queue.push({ type: "error", error });
|
144
|
+
};
|
145
|
+
socket.onclose = () => {
|
146
|
+
queue.close();
|
147
|
+
};
|
148
|
+
return queue;
|
149
|
+
}
|
48
150
|
withSettings(additionalSettings) {
|
49
151
|
return new ElevenLabsSpeechSynthesisModel({
|
50
152
|
...this.settings,
|
@@ -58,18 +160,28 @@ async function callElevenLabsTextToSpeechAPI({ api = new ElevenLabsApiConfigurat
|
|
58
160
|
headers: api.headers,
|
59
161
|
body: {
|
60
162
|
text,
|
61
|
-
model_id: modelId,
|
62
|
-
voice_settings: voiceSettings
|
63
|
-
? {
|
64
|
-
stability: voiceSettings.stability,
|
65
|
-
similarity_boost: voiceSettings.similarityBoost,
|
66
|
-
style: voiceSettings.style,
|
67
|
-
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
68
|
-
}
|
69
|
-
: undefined,
|
163
|
+
model_id: modelId ?? defaultModel,
|
164
|
+
voice_settings: toApiVoiceSettings(voiceSettings),
|
70
165
|
},
|
71
166
|
failedResponseHandler: failedElevenLabsCallResponseHandler,
|
72
167
|
successfulResponseHandler: createAudioMpegResponseHandler(),
|
73
168
|
abortSignal,
|
74
169
|
});
|
75
170
|
}
|
171
|
+
function toApiVoiceSettings(voiceSettings) {
|
172
|
+
return voiceSettings != null
|
173
|
+
? {
|
174
|
+
stability: voiceSettings.stability,
|
175
|
+
similarity_boost: voiceSettings.similarityBoost,
|
176
|
+
style: voiceSettings.style,
|
177
|
+
use_speaker_boost: voiceSettings.useSpeakerBoost,
|
178
|
+
}
|
179
|
+
: undefined;
|
180
|
+
}
|
181
|
+
function toGenerationConfig(generationConfig) {
|
182
|
+
return generationConfig != null
|
183
|
+
? {
|
184
|
+
chunk_length_schedule: generationConfig.chunkLengthSchedule,
|
185
|
+
}
|
186
|
+
: undefined;
|
187
|
+
}
|
@@ -43,7 +43,7 @@ class LmntSpeechSynthesisModel extends AbstractModel_js_1.AbstractModel {
|
|
43
43
|
length: this.settings.length,
|
44
44
|
};
|
45
45
|
}
|
46
|
-
|
46
|
+
doSynthesizeSpeechStandard(text, options) {
|
47
47
|
return this.callAPI(text, options);
|
48
48
|
}
|
49
49
|
withSettings(additionalSettings) {
|
@@ -21,6 +21,6 @@ export declare class LmntSpeechSynthesisModel extends AbstractModel<LmntSpeechSy
|
|
21
21
|
get modelName(): string;
|
22
22
|
private callAPI;
|
23
23
|
get settingsForEvent(): Partial<LmntSpeechSynthesisModelSettings>;
|
24
|
-
|
24
|
+
doSynthesizeSpeechStandard(text: string, options?: FunctionOptions): Promise<Buffer>;
|
25
25
|
withSettings(additionalSettings: Partial<LmntSpeechSynthesisModelSettings>): this;
|
26
26
|
}
|
@@ -40,7 +40,7 @@ export class LmntSpeechSynthesisModel extends AbstractModel {
|
|
40
40
|
length: this.settings.length,
|
41
41
|
};
|
42
42
|
}
|
43
|
-
|
43
|
+
doSynthesizeSpeechStandard(text, options) {
|
44
44
|
return this.callAPI(text, options);
|
45
45
|
}
|
46
46
|
withSettings(additionalSettings) {
|
@@ -1,7 +1,13 @@
|
|
1
1
|
"use strict";
|
2
|
+
var __importDefault = (this && this.__importDefault) || function (mod) {
|
3
|
+
return (mod && mod.__esModule) ? mod : { "default": mod };
|
4
|
+
};
|
2
5
|
Object.defineProperty(exports, "__esModule", { value: true });
|
3
6
|
exports.TikTokenTokenizer = void 0;
|
4
|
-
const
|
7
|
+
const lite_1 = require("js-tiktoken/lite");
|
8
|
+
const cl100k_base_1 = __importDefault(require("js-tiktoken/ranks/cl100k_base"));
|
9
|
+
const p50k_base_1 = __importDefault(require("js-tiktoken/ranks/p50k_base"));
|
10
|
+
const r50k_base_1 = __importDefault(require("js-tiktoken/ranks/r50k_base"));
|
5
11
|
const never_js_1 = require("../../util/never.cjs");
|
6
12
|
/**
|
7
13
|
* TikToken tokenizer for OpenAI language models.
|
@@ -29,9 +35,7 @@ class TikTokenTokenizer {
|
|
29
35
|
writable: true,
|
30
36
|
value: void 0
|
31
37
|
});
|
32
|
-
this.tiktoken =
|
33
|
-
? getEncodingNameForModel(options.model)
|
34
|
-
: options.encoding);
|
38
|
+
this.tiktoken = new lite_1.Tiktoken(getTiktokenBPE(options.model));
|
35
39
|
}
|
36
40
|
async tokenize(text) {
|
37
41
|
return this.tiktoken.encode(text);
|
@@ -50,12 +54,12 @@ class TikTokenTokenizer {
|
|
50
54
|
exports.TikTokenTokenizer = TikTokenTokenizer;
|
51
55
|
// implemented here (instead of using js-tiktoken) to be able to quickly updated it
|
52
56
|
// when new models are released
|
53
|
-
function
|
57
|
+
function getTiktokenBPE(model) {
|
54
58
|
switch (model) {
|
55
59
|
case "code-davinci-002":
|
56
60
|
case "text-davinci-002":
|
57
61
|
case "text-davinci-003": {
|
58
|
-
return
|
62
|
+
return p50k_base_1.default;
|
59
63
|
}
|
60
64
|
case "ada":
|
61
65
|
case "babbage":
|
@@ -64,7 +68,7 @@ function getEncodingNameForModel(model) {
|
|
64
68
|
case "text-ada-001":
|
65
69
|
case "text-babbage-001":
|
66
70
|
case "text-curie-001": {
|
67
|
-
return
|
71
|
+
return r50k_base_1.default;
|
68
72
|
}
|
69
73
|
case "babbage-002":
|
70
74
|
case "davinci-002":
|
@@ -81,7 +85,7 @@ function getEncodingNameForModel(model) {
|
|
81
85
|
case "gpt-4-32k-0314":
|
82
86
|
case "gpt-4-32k-0613":
|
83
87
|
case "text-embedding-ada-002": {
|
84
|
-
return
|
88
|
+
return cl100k_base_1.default;
|
85
89
|
}
|
86
90
|
default: {
|
87
91
|
(0, never_js_1.never)(model);
|
@@ -1,4 +1,3 @@
|
|
1
|
-
import { TiktokenEncoding } from "js-tiktoken";
|
2
1
|
import { FullTokenizer } from "../../model-function/tokenize-text/Tokenizer.js";
|
3
2
|
import { OpenAITextEmbeddingModelType } from "./OpenAITextEmbeddingModel.js";
|
4
3
|
import { OpenAITextGenerationBaseModelType } from "./OpenAITextGenerationModel.js";
|
@@ -24,8 +23,6 @@ export declare class TikTokenTokenizer implements FullTokenizer {
|
|
24
23
|
*/
|
25
24
|
constructor(options: {
|
26
25
|
model: OpenAIChatBaseModelType | OpenAITextGenerationBaseModelType | OpenAITextEmbeddingModelType;
|
27
|
-
} | {
|
28
|
-
encoding: TiktokenEncoding;
|
29
26
|
});
|
30
27
|
private readonly tiktoken;
|
31
28
|
tokenize(text: string): Promise<number[]>;
|
@@ -1,4 +1,7 @@
|
|
1
|
-
import {
|
1
|
+
import { Tiktoken } from "js-tiktoken/lite";
|
2
|
+
import cl100k_base from "js-tiktoken/ranks/cl100k_base";
|
3
|
+
import p50k_base from "js-tiktoken/ranks/p50k_base";
|
4
|
+
import r50k_base from "js-tiktoken/ranks/r50k_base";
|
2
5
|
import { never } from "../../util/never.js";
|
3
6
|
/**
|
4
7
|
* TikToken tokenizer for OpenAI language models.
|
@@ -26,9 +29,7 @@ export class TikTokenTokenizer {
|
|
26
29
|
writable: true,
|
27
30
|
value: void 0
|
28
31
|
});
|
29
|
-
this.tiktoken =
|
30
|
-
? getEncodingNameForModel(options.model)
|
31
|
-
: options.encoding);
|
32
|
+
this.tiktoken = new Tiktoken(getTiktokenBPE(options.model));
|
32
33
|
}
|
33
34
|
async tokenize(text) {
|
34
35
|
return this.tiktoken.encode(text);
|
@@ -46,12 +47,12 @@ export class TikTokenTokenizer {
|
|
46
47
|
}
|
47
48
|
// implemented here (instead of using js-tiktoken) to be able to quickly updated it
|
48
49
|
// when new models are released
|
49
|
-
function
|
50
|
+
function getTiktokenBPE(model) {
|
50
51
|
switch (model) {
|
51
52
|
case "code-davinci-002":
|
52
53
|
case "text-davinci-002":
|
53
54
|
case "text-davinci-003": {
|
54
|
-
return
|
55
|
+
return p50k_base;
|
55
56
|
}
|
56
57
|
case "ada":
|
57
58
|
case "babbage":
|
@@ -60,7 +61,7 @@ function getEncodingNameForModel(model) {
|
|
60
61
|
case "text-ada-001":
|
61
62
|
case "text-babbage-001":
|
62
63
|
case "text-curie-001": {
|
63
|
-
return
|
64
|
+
return r50k_base;
|
64
65
|
}
|
65
66
|
case "babbage-002":
|
66
67
|
case "davinci-002":
|
@@ -77,7 +78,7 @@ function getEncodingNameForModel(model) {
|
|
77
78
|
case "gpt-4-32k-0314":
|
78
79
|
case "gpt-4-32k-0613":
|
79
80
|
case "text-embedding-ada-002": {
|
80
|
-
return
|
81
|
+
return cl100k_base;
|
81
82
|
}
|
82
83
|
default: {
|
83
84
|
never(model);
|
package/package.json
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
{
|
2
2
|
"name": "modelfusion",
|
3
3
|
"description": "Build multimodal applications, chatbots, and agents with JavaScript and TypeScript.",
|
4
|
-
"version": "0.
|
4
|
+
"version": "0.48.0",
|
5
5
|
"author": "Lars Grammel",
|
6
6
|
"license": "MIT",
|
7
7
|
"keywords": [
|
@@ -57,6 +57,7 @@
|
|
57
57
|
"js-tiktoken": "1.0.7",
|
58
58
|
"nanoid": "3.3.6",
|
59
59
|
"secure-json-parse": "2.7.0",
|
60
|
+
"ws": "8.14.2",
|
60
61
|
"zod": "3.22.4",
|
61
62
|
"zod-to-json-schema": "3.21.4"
|
62
63
|
},
|
@@ -64,6 +65,7 @@
|
|
64
65
|
"@tsconfig/recommended": "1.0.3",
|
65
66
|
"@types/deep-equal": "^1.0.2",
|
66
67
|
"@types/node": "18.11.9",
|
68
|
+
"@types/ws": "^8.5.7",
|
67
69
|
"@typescript-eslint/eslint-plugin": "^6.1.0",
|
68
70
|
"@typescript-eslint/parser": "^6.1.0",
|
69
71
|
"copyfiles": "2.4.1",
|
@@ -0,0 +1,54 @@
|
|
1
|
+
"use strict";
|
2
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
3
|
+
exports.MediaSourceAppender = void 0;
|
4
|
+
class MediaSourceAppender {
|
5
|
+
constructor(type) {
|
6
|
+
Object.defineProperty(this, "mediaSource", {
|
7
|
+
enumerable: true,
|
8
|
+
configurable: true,
|
9
|
+
writable: true,
|
10
|
+
value: new MediaSource()
|
11
|
+
});
|
12
|
+
Object.defineProperty(this, "audioChunks", {
|
13
|
+
enumerable: true,
|
14
|
+
configurable: true,
|
15
|
+
writable: true,
|
16
|
+
value: []
|
17
|
+
});
|
18
|
+
Object.defineProperty(this, "sourceBuffer", {
|
19
|
+
enumerable: true,
|
20
|
+
configurable: true,
|
21
|
+
writable: true,
|
22
|
+
value: void 0
|
23
|
+
});
|
24
|
+
this.mediaSource.addEventListener("sourceopen", async () => {
|
25
|
+
this.sourceBuffer = this.mediaSource.addSourceBuffer(type);
|
26
|
+
this.sourceBuffer.addEventListener("updateend", () => {
|
27
|
+
this.tryAppendNextChunk();
|
28
|
+
});
|
29
|
+
});
|
30
|
+
}
|
31
|
+
tryAppendNextChunk() {
|
32
|
+
if (this.sourceBuffer != null &&
|
33
|
+
!this.sourceBuffer.updating &&
|
34
|
+
this.audioChunks.length > 0) {
|
35
|
+
this.sourceBuffer.appendBuffer(this.audioChunks.shift());
|
36
|
+
}
|
37
|
+
}
|
38
|
+
addBase64Data(base64Data) {
|
39
|
+
this.addData(Uint8Array.from(atob(base64Data), (char) => char.charCodeAt(0)).buffer);
|
40
|
+
}
|
41
|
+
addData(data) {
|
42
|
+
this.audioChunks.push(data);
|
43
|
+
this.tryAppendNextChunk();
|
44
|
+
}
|
45
|
+
close() {
|
46
|
+
if (this.mediaSource.readyState === "open") {
|
47
|
+
this.mediaSource.endOfStream();
|
48
|
+
}
|
49
|
+
}
|
50
|
+
get mediaSourceUrl() {
|
51
|
+
return URL.createObjectURL(this.mediaSource);
|
52
|
+
}
|
53
|
+
}
|
54
|
+
exports.MediaSourceAppender = MediaSourceAppender;
|
@@ -0,0 +1,11 @@
|
|
1
|
+
export declare class MediaSourceAppender {
|
2
|
+
private readonly mediaSource;
|
3
|
+
private readonly audioChunks;
|
4
|
+
private sourceBuffer?;
|
5
|
+
constructor(type: string);
|
6
|
+
private tryAppendNextChunk;
|
7
|
+
addBase64Data(base64Data: string): void;
|
8
|
+
addData(data: ArrayBuffer): void;
|
9
|
+
close(): void;
|
10
|
+
get mediaSourceUrl(): string;
|
11
|
+
}
|
@@ -0,0 +1,50 @@
|
|
1
|
+
export class MediaSourceAppender {
|
2
|
+
constructor(type) {
|
3
|
+
Object.defineProperty(this, "mediaSource", {
|
4
|
+
enumerable: true,
|
5
|
+
configurable: true,
|
6
|
+
writable: true,
|
7
|
+
value: new MediaSource()
|
8
|
+
});
|
9
|
+
Object.defineProperty(this, "audioChunks", {
|
10
|
+
enumerable: true,
|
11
|
+
configurable: true,
|
12
|
+
writable: true,
|
13
|
+
value: []
|
14
|
+
});
|
15
|
+
Object.defineProperty(this, "sourceBuffer", {
|
16
|
+
enumerable: true,
|
17
|
+
configurable: true,
|
18
|
+
writable: true,
|
19
|
+
value: void 0
|
20
|
+
});
|
21
|
+
this.mediaSource.addEventListener("sourceopen", async () => {
|
22
|
+
this.sourceBuffer = this.mediaSource.addSourceBuffer(type);
|
23
|
+
this.sourceBuffer.addEventListener("updateend", () => {
|
24
|
+
this.tryAppendNextChunk();
|
25
|
+
});
|
26
|
+
});
|
27
|
+
}
|
28
|
+
tryAppendNextChunk() {
|
29
|
+
if (this.sourceBuffer != null &&
|
30
|
+
!this.sourceBuffer.updating &&
|
31
|
+
this.audioChunks.length > 0) {
|
32
|
+
this.sourceBuffer.appendBuffer(this.audioChunks.shift());
|
33
|
+
}
|
34
|
+
}
|
35
|
+
addBase64Data(base64Data) {
|
36
|
+
this.addData(Uint8Array.from(atob(base64Data), (char) => char.charCodeAt(0)).buffer);
|
37
|
+
}
|
38
|
+
addData(data) {
|
39
|
+
this.audioChunks.push(data);
|
40
|
+
this.tryAppendNextChunk();
|
41
|
+
}
|
42
|
+
close() {
|
43
|
+
if (this.mediaSource.readyState === "open") {
|
44
|
+
this.mediaSource.endOfStream();
|
45
|
+
}
|
46
|
+
}
|
47
|
+
get mediaSourceUrl() {
|
48
|
+
return URL.createObjectURL(this.mediaSource);
|
49
|
+
}
|
50
|
+
}
|
package/ui/index.cjs
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
3
|
+
if (k2 === undefined) k2 = k;
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
7
|
+
}
|
8
|
+
Object.defineProperty(o, k2, desc);
|
9
|
+
}) : (function(o, m, k, k2) {
|
10
|
+
if (k2 === undefined) k2 = k;
|
11
|
+
o[k2] = m[k];
|
12
|
+
}));
|
13
|
+
var __exportStar = (this && this.__exportStar) || function(m, exports) {
|
14
|
+
for (var p in m) if (p !== "default" && !Object.prototype.hasOwnProperty.call(exports, p)) __createBinding(exports, m, p);
|
15
|
+
};
|
16
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
17
|
+
__exportStar(require("./MediaSourceAppender.cjs"), exports);
|
package/ui/index.d.ts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
export * from "./MediaSourceAppender.js";
|
package/ui/index.js
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
export * from "./MediaSourceAppender.js";
|
@@ -0,0 +1,41 @@
|
|
1
|
+
"use strict";
|
2
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
3
|
+
if (k2 === undefined) k2 = k;
|
4
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
5
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
6
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
7
|
+
}
|
8
|
+
Object.defineProperty(o, k2, desc);
|
9
|
+
}) : (function(o, m, k, k2) {
|
10
|
+
if (k2 === undefined) k2 = k;
|
11
|
+
o[k2] = m[k];
|
12
|
+
}));
|
13
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
14
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
15
|
+
}) : function(o, v) {
|
16
|
+
o["default"] = v;
|
17
|
+
});
|
18
|
+
var __importStar = (this && this.__importStar) || function (mod) {
|
19
|
+
if (mod && mod.__esModule) return mod;
|
20
|
+
var result = {};
|
21
|
+
if (mod != null) for (var k in mod) if (k !== "default" && Object.prototype.hasOwnProperty.call(mod, k)) __createBinding(result, mod, k);
|
22
|
+
__setModuleDefault(result, mod);
|
23
|
+
return result;
|
24
|
+
};
|
25
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
26
|
+
exports.createSimpleWebSocket = void 0;
|
27
|
+
/**
|
28
|
+
* Creates a simplified websocket connection. This function works in both Node.js and browser.
|
29
|
+
*/
|
30
|
+
async function createSimpleWebSocket(url) {
|
31
|
+
if (typeof window === "undefined") {
|
32
|
+
// Use ws library in Node.js:
|
33
|
+
const { default: WebSocket } = await Promise.resolve().then(() => __importStar(require("ws")));
|
34
|
+
return new WebSocket(url);
|
35
|
+
}
|
36
|
+
else {
|
37
|
+
// Use native WebSocket in browser:
|
38
|
+
return new WebSocket(url);
|
39
|
+
}
|
40
|
+
}
|
41
|
+
exports.createSimpleWebSocket = createSimpleWebSocket;
|
@@ -0,0 +1,12 @@
|
|
1
|
+
export interface SimpleWebSocket {
|
2
|
+
send(data: string): void;
|
3
|
+
onmessage: ((event: MessageEvent) => void) | null;
|
4
|
+
onopen: ((event: Event) => void) | null;
|
5
|
+
onclose: ((event: CloseEvent) => void) | null;
|
6
|
+
onerror: ((event: Event) => void) | null;
|
7
|
+
close(code?: number, reason?: string): void;
|
8
|
+
}
|
9
|
+
/**
|
10
|
+
* Creates a simplified websocket connection. This function works in both Node.js and browser.
|
11
|
+
*/
|
12
|
+
export declare function createSimpleWebSocket(url: string): Promise<SimpleWebSocket>;
|
@@ -0,0 +1,14 @@
|
|
1
|
+
/**
|
2
|
+
* Creates a simplified websocket connection. This function works in both Node.js and browser.
|
3
|
+
*/
|
4
|
+
export async function createSimpleWebSocket(url) {
|
5
|
+
if (typeof window === "undefined") {
|
6
|
+
// Use ws library in Node.js:
|
7
|
+
const { default: WebSocket } = await import("ws");
|
8
|
+
return new WebSocket(url);
|
9
|
+
}
|
10
|
+
else {
|
11
|
+
// Use native WebSocket in browser:
|
12
|
+
return new WebSocket(url);
|
13
|
+
}
|
14
|
+
}
|