@delofarag/ai-utils 1.4.3 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +354 -216
- package/dist/heart/agent.d.ts +1 -1
- package/dist/heart/agent.d.ts.map +1 -1
- package/dist/heart/agent.js +3 -8
- package/dist/heart/agent.js.map +1 -1
- package/dist/heart/chain.d.ts +1 -1
- package/dist/heart/chain.d.ts.map +1 -1
- package/dist/heart/chain.js +4 -4
- package/dist/heart/chain.js.map +1 -1
- package/dist/heart/chatbot.d.ts +1 -1
- package/dist/heart/memorychain.d.ts +2 -2
- package/dist/heart/memorychain.js +6 -6
- package/dist/heart/memorychain.js.map +1 -1
- package/dist/heart/tools/zodios/ZodiosToolRegistry.d.ts +1 -1
- package/dist/heart/tools/zodios/ZodiosToolRegistry.d.ts.map +1 -1
- package/dist/helpers/chatbot.d.ts +33 -0
- package/dist/helpers/chatbot.d.ts.map +1 -0
- package/dist/helpers/chatbot.js +72 -0
- package/dist/helpers/chatbot.js.map +1 -0
- package/dist/helpers/helpers.d.ts +25 -0
- package/dist/helpers/helpers.d.ts.map +1 -0
- package/dist/helpers/helpers.js +43 -0
- package/dist/helpers/helpers.js.map +1 -0
- package/dist/helpers/llms.d.ts +92 -0
- package/dist/helpers/llms.d.ts.map +1 -0
- package/dist/helpers/llms.js +49 -0
- package/dist/helpers/llms.js.map +1 -0
- package/dist/helpers/memory.d.ts +91 -0
- package/dist/helpers/memory.d.ts.map +1 -0
- package/dist/helpers/memory.js +351 -0
- package/dist/helpers/memory.js.map +1 -0
- package/dist/helpers/rag.d.ts +42 -0
- package/dist/helpers/rag.d.ts.map +1 -0
- package/dist/helpers/rag.js +66 -0
- package/dist/helpers/rag.js.map +1 -0
- package/dist/helpers.d.ts +3 -3
- package/dist/index.d.ts +10 -9
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +10 -9
- package/dist/index.js.map +1 -1
- package/dist/magic-funcs/answerers/ask.d.ts +18 -2
- package/dist/magic-funcs/answerers/ask.d.ts.map +1 -1
- package/dist/magic-funcs/answerers/ask.js +20 -9
- package/dist/magic-funcs/answerers/ask.js.map +1 -1
- package/dist/magic-funcs/answerers/websearch.d.ts +15 -0
- package/dist/magic-funcs/answerers/websearch.d.ts.map +1 -1
- package/dist/magic-funcs/answerers/websearch.js +15 -0
- package/dist/magic-funcs/answerers/websearch.js.map +1 -1
- package/dist/magic-funcs/evaluators/classify.d.ts +20 -1
- package/dist/magic-funcs/evaluators/classify.d.ts.map +1 -1
- package/dist/magic-funcs/evaluators/classify.js +22 -3
- package/dist/magic-funcs/evaluators/classify.js.map +1 -1
- package/dist/magic-funcs/evaluators/decide.d.ts +18 -0
- package/dist/magic-funcs/evaluators/decide.d.ts.map +1 -1
- package/dist/magic-funcs/evaluators/decide.js +21 -3
- package/dist/magic-funcs/evaluators/decide.js.map +1 -1
- package/dist/magic-funcs/optimizers/promptify.d.ts +18 -0
- package/dist/magic-funcs/optimizers/promptify.d.ts.map +1 -1
- package/dist/magic-funcs/optimizers/promptify.js +21 -3
- package/dist/magic-funcs/optimizers/promptify.js.map +1 -1
- package/dist/magic-funcs/optimizers/ragify.d.ts +16 -0
- package/dist/magic-funcs/optimizers/ragify.d.ts.map +1 -1
- package/dist/magic-funcs/optimizers/ragify.js +19 -3
- package/dist/magic-funcs/optimizers/ragify.js.map +1 -1
- package/dist/magic-funcs/parsers/extract.d.ts +23 -0
- package/dist/magic-funcs/parsers/extract.d.ts.map +1 -1
- package/dist/magic-funcs/parsers/extract.js +26 -3
- package/dist/magic-funcs/parsers/extract.js.map +1 -1
- package/dist/magic-funcs/parsers/rewrite.d.ts +19 -0
- package/dist/magic-funcs/parsers/rewrite.d.ts.map +1 -1
- package/dist/magic-funcs/parsers/rewrite.js +22 -2
- package/dist/magic-funcs/parsers/rewrite.js.map +1 -1
- package/dist/magic-funcs/parsers/structure.d.ts +22 -0
- package/dist/magic-funcs/parsers/structure.d.ts.map +1 -1
- package/dist/magic-funcs/parsers/structure.js +27 -4
- package/dist/magic-funcs/parsers/structure.js.map +1 -1
- package/dist/magic-funcs/parsers/summarize.d.ts +20 -0
- package/dist/magic-funcs/parsers/summarize.d.ts.map +1 -1
- package/dist/magic-funcs/parsers/summarize.js +23 -3
- package/dist/magic-funcs/parsers/summarize.js.map +1 -1
- package/dist/memory.d.ts +1 -1
- package/dist/modalities/image-gen/generateImages.d.ts +56 -0
- package/dist/modalities/image-gen/generateImages.d.ts.map +1 -0
- package/dist/modalities/image-gen/generateImages.js +66 -0
- package/dist/modalities/image-gen/generateImages.js.map +1 -0
- package/dist/modalities/image-gen/helpers.d.ts +15 -0
- package/dist/modalities/image-gen/helpers.d.ts.map +1 -0
- package/dist/modalities/image-gen/helpers.js +17 -0
- package/dist/modalities/image-gen/helpers.js.map +1 -0
- package/dist/modalities/openrouter.d.ts +11 -0
- package/dist/modalities/openrouter.d.ts.map +1 -0
- package/dist/modalities/openrouter.js +39 -0
- package/dist/modalities/openrouter.js.map +1 -0
- package/dist/modalities/speech/VoiceEngine.d.ts +3 -0
- package/dist/modalities/speech/VoiceEngine.d.ts.map +1 -0
- package/dist/modalities/speech/VoiceEngine.js +3 -0
- package/dist/modalities/speech/VoiceEngine.js.map +1 -0
- package/dist/modalities/speech/stt/helpers.d.ts +3 -0
- package/dist/modalities/speech/stt/helpers.d.ts.map +1 -0
- package/dist/modalities/speech/stt/helpers.js +28 -0
- package/dist/modalities/speech/stt/helpers.js.map +1 -0
- package/dist/modalities/speech/stt/stt.d.ts +113 -0
- package/dist/modalities/speech/stt/stt.d.ts.map +1 -0
- package/dist/modalities/speech/stt/stt.js +177 -0
- package/dist/modalities/speech/stt/stt.js.map +1 -0
- package/dist/modalities/speech/tts/helpers.d.ts +12 -0
- package/dist/modalities/speech/tts/helpers.d.ts.map +1 -0
- package/dist/modalities/speech/tts/helpers.js +10 -0
- package/dist/modalities/speech/tts/helpers.js.map +1 -0
- package/dist/modalities/speech/tts/tts.d.ts +115 -0
- package/dist/modalities/speech/tts/tts.d.ts.map +1 -0
- package/dist/modalities/speech/tts/tts.js +142 -0
- package/dist/modalities/speech/tts/tts.js.map +1 -0
- package/dist/modalities/vision/helpers.d.ts +3 -0
- package/dist/modalities/vision/helpers.d.ts.map +1 -0
- package/dist/modalities/vision/helpers.js +7 -0
- package/dist/modalities/vision/helpers.js.map +1 -0
- package/dist/modalities/vision/vision.d.ts +55 -0
- package/dist/modalities/vision/vision.d.ts.map +1 -0
- package/dist/modalities/vision/vision.js +71 -0
- package/dist/modalities/vision/vision.js.map +1 -0
- package/package.json +7 -7
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
import { OpenRouterSTTModel } from "../../../helpers/llms";
|
|
2
|
+
import { LLMInstance } from "../../openrouter";
|
|
3
|
+
import { STTAudioInput } from "./helpers";
|
|
4
|
+
type STTOptions = {
|
|
5
|
+
llm?: LLMInstance;
|
|
6
|
+
audio: STTAudioInput;
|
|
7
|
+
prompt?: string;
|
|
8
|
+
audioFormat?: "wav" | "mp3" | "aiff" | "aac" | "ogg" | "flac" | "m4a" | "pcm16" | "pcm24";
|
|
9
|
+
model?: OpenRouterSTTModel;
|
|
10
|
+
};
|
|
11
|
+
type PhoneSocketChunk = string | Buffer | Uint8Array;
|
|
12
|
+
type STTPhoneSocketSessionOptions = {
|
|
13
|
+
llm?: LLMInstance;
|
|
14
|
+
prompt?: string;
|
|
15
|
+
audioFormat?: "wav" | "mp3" | "aiff" | "aac" | "ogg" | "flac" | "m4a" | "pcm16" | "pcm24";
|
|
16
|
+
model?: OpenRouterSTTModel;
|
|
17
|
+
flushIntervalMs?: number;
|
|
18
|
+
minBufferBytes?: number;
|
|
19
|
+
onTranscription?: (result: {
|
|
20
|
+
text: string;
|
|
21
|
+
raw: unknown;
|
|
22
|
+
}) => Promise<void> | void;
|
|
23
|
+
};
|
|
24
|
+
/**
|
|
25
|
+
* Transcribes audio to text using an OpenRouter speech-capable model.
|
|
26
|
+
*
|
|
27
|
+
* Internally this function sends an `input_audio` payload through the OpenRouter
|
|
28
|
+
* chat-completions-compatible flow. If no `llm` is provided, it builds one with
|
|
29
|
+
* `getLLM({ provider: "openrouter", type: "stt" })` and reads
|
|
30
|
+
* `process.env.OPENROUTER_API_KEY`.
|
|
31
|
+
*
|
|
32
|
+
* Make sure `OPENROUTER_API_KEY` is set in your `.env`.
|
|
33
|
+
*
|
|
34
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
35
|
+
* @param params.audio Audio source as file path, base64 string, `Buffer`, or `Uint8Array`.
|
|
36
|
+
* @param params.prompt Optional instruction text for the transcription model.
|
|
37
|
+
* @param params.audioFormat Audio format sent to the model (for example `wav`, `mp3`, `ogg`).
|
|
38
|
+
* @param params.model Optional model override for this call.
|
|
39
|
+
* @returns Promise with normalized `text` and full provider response in `raw`.
|
|
40
|
+
*
|
|
41
|
+
* @example
|
|
42
|
+
* CONFIG:
|
|
43
|
+
* ```ts
|
|
44
|
+
* stt({
|
|
45
|
+
* llm = getLLM({ provider: "openrouter", type: "stt" }),
|
|
46
|
+
* prompt = "Please transcribe this audio file.",
|
|
47
|
+
* audioFormat = "wav",
|
|
48
|
+
* model,
|
|
49
|
+
* audio
|
|
50
|
+
* })
|
|
51
|
+
* ```
|
|
52
|
+
*
|
|
53
|
+
* @example
|
|
54
|
+
* ```ts
|
|
55
|
+
* const result = await stt({
|
|
56
|
+
* audio: "./audio/meeting.wav",
|
|
57
|
+
* prompt: "Transcribe this in German."
|
|
58
|
+
* });
|
|
59
|
+
*
|
|
60
|
+
* console.log(result.text);
|
|
61
|
+
* ```
|
|
62
|
+
*/
|
|
63
|
+
export declare function stt({ llm, prompt, audioFormat, model, audio }: STTOptions): Promise<{
|
|
64
|
+
text: string;
|
|
65
|
+
raw: unknown;
|
|
66
|
+
}>;
|
|
67
|
+
/**
|
|
68
|
+
* Creates a lightweight live STT session for phone sockets (for example Twilio/Telnyx media chunks).
|
|
69
|
+
*
|
|
70
|
+
* Internally this helper buffers incoming live audio frames and repeatedly calls `stt()`
|
|
71
|
+
* to emit near-real-time transcriptions through `onTranscription`.
|
|
72
|
+
*
|
|
73
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
74
|
+
* @param params.prompt Optional transcription instruction prompt.
|
|
75
|
+
* @param params.audioFormat Audio format for incoming phone media chunks.
|
|
76
|
+
* @param params.model Optional model override for this session.
|
|
77
|
+
* @param params.flushIntervalMs Interval in milliseconds for automatic transcription flushes.
|
|
78
|
+
* @param params.minBufferBytes Minimum buffered bytes required before transcribing.
|
|
79
|
+
* @param params.onTranscription Optional callback that receives each partial transcription.
|
|
80
|
+
* @returns Session controller with `pushChunk`, `flush`, and `stop`.
|
|
81
|
+
*
|
|
82
|
+
* @example
|
|
83
|
+
* CONFIG:
|
|
84
|
+
* ```ts
|
|
85
|
+
* createSTTPhoneSocketSession({
|
|
86
|
+
* llm = getLLM({ provider: "openrouter", type: "stt" }),
|
|
87
|
+
flushIntervalMs = 2000,
|
|
88
|
+
minBufferBytes = 1024,
|
|
89
|
+
prompt = "Please transcribe",
|
|
90
|
+
audioFormat = "wav",
|
|
91
|
+
model,
|
|
92
|
+
onTranscription
|
|
93
|
+
* })
|
|
94
|
+
* ```
|
|
95
|
+
*
|
|
96
|
+
* @example
|
|
97
|
+
* ```ts
|
|
98
|
+
* const session = createSTTPhoneSocketSession({
|
|
99
|
+
* audioFormat: "pcm16",
|
|
100
|
+
* onTranscription: ({ text }) => console.log(text)
|
|
101
|
+
* });
|
|
102
|
+
*
|
|
103
|
+
* session.pushChunk(mediaPayloadBase64);
|
|
104
|
+
* await session.stop();
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
export declare function createSTTPhoneSocketSession({ llm, flushIntervalMs, minBufferBytes, prompt, audioFormat, model, onTranscription }: STTPhoneSocketSessionOptions): {
|
|
108
|
+
pushChunk(chunk: PhoneSocketChunk): void;
|
|
109
|
+
flush(): Promise<void>;
|
|
110
|
+
stop(): Promise<void>;
|
|
111
|
+
};
|
|
112
|
+
export {};
|
|
113
|
+
//# sourceMappingURL=stt.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stt.d.ts","sourceRoot":"","sources":["../../../../src/modalities/speech/stt/stt.ts"],"names":[],"mappings":"AACA,OAAO,EAAU,kBAAkB,EAAE,MAAM,uBAAuB,CAAA;AAClE,OAAO,EAAE,WAAW,EAAwB,MAAM,kBAAkB,CAAA;AACpE,OAAO,EAAE,aAAa,EAAiB,MAAM,WAAW,CAAA;AAExD,KAAK,UAAU,GAAG;IACd,GAAG,CAAC,EAAE,WAAW,CAAA;IACjB,KAAK,EAAE,aAAa,CAAA;IACpB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,OAAO,GAAG,OAAO,CAAA;IACzF,KAAK,CAAC,EAAE,kBAAkB,CAAA;CAC7B,CAAA;AAED,KAAK,gBAAgB,GAAG,MAAM,GAAG,MAAM,GAAG,UAAU,CAAA;AAEpD,KAAK,4BAA4B,GAAG;IAChC,GAAG,CAAC,EAAE,WAAW,CAAA;IACjB,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,WAAW,CAAC,EAAE,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,KAAK,GAAG,MAAM,GAAG,KAAK,GAAG,OAAO,GAAG,OAAO,CAAA;IACzF,KAAK,CAAC,EAAE,kBAAkB,CAAA;IAC1B,eAAe,CAAC,EAAE,MAAM,CAAA;IACxB,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,eAAe,CAAC,EAAE,CAAC,MAAM,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,GAAG,EAAE,OAAO,CAAA;KAAE,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAA;CACrF,CAAA;AAcD;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,wBAAsB,GAAG,CAAC,EACtB,GAAqD,EACrD,MAA6C,EAC7C,WAAmB,EACnB,KAAK,EACL,KAAK,EACR,EAAE,UAAU,GAAG,OAAO,CAAC;IAAE,IAAI,EAAE,MAAM,CAAC;IAAC,GAAG,EAAE,OAAO,CAAA;CAAE,CAAC,CAqCtD;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AACH,wBAAgB,2BAA2B,CAAC,EACxC,GAAqD,EACrD,eAAsB,EACtB,cAAqB,EACrB,MAA4B,EAC5B,WAAmB,EACnB,KAAK,EACL,eAAe,EAClB,EAAE,4BAA4B;qBAyCN,gBAAgB;;;EAexC"}
|
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
import { HumanMessage } from "../../../imports";
|
|
2
|
+
import { getLLM } from "../../../helpers/llms";
|
|
3
|
+
import { getOpenRouterRuntime } from "../../openrouter";
|
|
4
|
+
import { toBase64Audio } from "./helpers";
|
|
5
|
+
function normalizePhoneSocketChunk(chunk) {
|
|
6
|
+
if (typeof chunk === "string") {
|
|
7
|
+
return Buffer.from(chunk, "base64");
|
|
8
|
+
}
|
|
9
|
+
if (Buffer.isBuffer(chunk)) {
|
|
10
|
+
return chunk;
|
|
11
|
+
}
|
|
12
|
+
return Buffer.from(chunk);
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Transcribes audio to text using an OpenRouter speech-capable model.
|
|
16
|
+
*
|
|
17
|
+
* Internally this function sends an `input_audio` payload through the OpenRouter
|
|
18
|
+
* chat-completions-compatible flow. If no `llm` is provided, it builds one with
|
|
19
|
+
* `getLLM({ provider: "openrouter", type: "stt" })` and reads
|
|
20
|
+
* `process.env.OPENROUTER_API_KEY`.
|
|
21
|
+
*
|
|
22
|
+
* Make sure `OPENROUTER_API_KEY` is set in your `.env`.
|
|
23
|
+
*
|
|
24
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
25
|
+
* @param params.audio Audio source as file path, base64 string, `Buffer`, or `Uint8Array`.
|
|
26
|
+
* @param params.prompt Optional instruction text for the transcription model.
|
|
27
|
+
* @param params.audioFormat Audio format sent to the model (for example `wav`, `mp3`, `ogg`).
|
|
28
|
+
* @param params.model Optional model override for this call.
|
|
29
|
+
* @returns Promise with normalized `text` and full provider response in `raw`.
|
|
30
|
+
*
|
|
31
|
+
* @example
|
|
32
|
+
* CONFIG:
|
|
33
|
+
* ```ts
|
|
34
|
+
* stt({
|
|
35
|
+
* llm = getLLM({ provider: "openrouter", type: "stt" }),
|
|
36
|
+
* prompt = "Please transcribe this audio file.",
|
|
37
|
+
* audioFormat = "wav",
|
|
38
|
+
* model,
|
|
39
|
+
* audio
|
|
40
|
+
* })
|
|
41
|
+
* ```
|
|
42
|
+
*
|
|
43
|
+
* @example
|
|
44
|
+
* ```ts
|
|
45
|
+
* const result = await stt({
|
|
46
|
+
* audio: "./audio/meeting.wav",
|
|
47
|
+
* prompt: "Transcribe this in German."
|
|
48
|
+
* });
|
|
49
|
+
*
|
|
50
|
+
* console.log(result.text);
|
|
51
|
+
* ```
|
|
52
|
+
*/
|
|
53
|
+
export async function stt({ llm = getLLM({ provider: "openrouter", type: "stt" }), prompt = "Please transcribe this audio file.", audioFormat = "wav", model, audio }) {
|
|
54
|
+
const runtime = getOpenRouterRuntime(llm);
|
|
55
|
+
const audioBase64 = await toBase64Audio(audio);
|
|
56
|
+
const message = new HumanMessage({
|
|
57
|
+
content: [
|
|
58
|
+
{
|
|
59
|
+
type: "text",
|
|
60
|
+
text: prompt
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
type: "input_audio",
|
|
64
|
+
input_audio: {
|
|
65
|
+
data: audioBase64,
|
|
66
|
+
format: audioFormat
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
]
|
|
70
|
+
});
|
|
71
|
+
const runtimeLLM = model && model !== runtime.model
|
|
72
|
+
? getLLM({
|
|
73
|
+
provider: "openrouter",
|
|
74
|
+
type: "stt",
|
|
75
|
+
model,
|
|
76
|
+
apikey: runtime.apiKey
|
|
77
|
+
})
|
|
78
|
+
: llm;
|
|
79
|
+
const response = await runtimeLLM.invoke([message]);
|
|
80
|
+
const text = typeof response.content === "string" ? response.content : JSON.stringify(response.content);
|
|
81
|
+
return {
|
|
82
|
+
text,
|
|
83
|
+
raw: response
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/**
|
|
87
|
+
* Creates a lightweight live STT session for phone sockets (for example Twilio/Telnyx media chunks).
|
|
88
|
+
*
|
|
89
|
+
* Internally this helper buffers incoming live audio frames and repeatedly calls `stt()`
|
|
90
|
+
* to emit near-real-time transcriptions through `onTranscription`.
|
|
91
|
+
*
|
|
92
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
93
|
+
* @param params.prompt Optional transcription instruction prompt.
|
|
94
|
+
* @param params.audioFormat Audio format for incoming phone media chunks.
|
|
95
|
+
* @param params.model Optional model override for this session.
|
|
96
|
+
* @param params.flushIntervalMs Interval in milliseconds for automatic transcription flushes.
|
|
97
|
+
* @param params.minBufferBytes Minimum buffered bytes required before transcribing.
|
|
98
|
+
* @param params.onTranscription Optional callback that receives each partial transcription.
|
|
99
|
+
* @returns Session controller with `pushChunk`, `flush`, and `stop`.
|
|
100
|
+
*
|
|
101
|
+
* @example
|
|
102
|
+
* CONFIG:
|
|
103
|
+
* ```ts
|
|
104
|
+
* createSTTPhoneSocketSession({
|
|
105
|
+
* llm = getLLM({ provider: "openrouter", type: "stt" }),
|
|
106
|
+
flushIntervalMs = 2000,
|
|
107
|
+
minBufferBytes = 1024,
|
|
108
|
+
prompt = "Please transcribe",
|
|
109
|
+
audioFormat = "wav",
|
|
110
|
+
model,
|
|
111
|
+
onTranscription
|
|
112
|
+
* })
|
|
113
|
+
* ```
|
|
114
|
+
*
|
|
115
|
+
* @example
|
|
116
|
+
* ```ts
|
|
117
|
+
* const session = createSTTPhoneSocketSession({
|
|
118
|
+
* audioFormat: "pcm16",
|
|
119
|
+
* onTranscription: ({ text }) => console.log(text)
|
|
120
|
+
* });
|
|
121
|
+
*
|
|
122
|
+
* session.pushChunk(mediaPayloadBase64);
|
|
123
|
+
* await session.stop();
|
|
124
|
+
* ```
|
|
125
|
+
*/
|
|
126
|
+
export function createSTTPhoneSocketSession({ llm = getLLM({ provider: "openrouter", type: "stt" }), flushIntervalMs = 2000, minBufferBytes = 1024, prompt = "Please transcribe", audioFormat = "wav", model, onTranscription }) {
|
|
127
|
+
const chunks = [];
|
|
128
|
+
let isFlushing = false;
|
|
129
|
+
let isStopped = false;
|
|
130
|
+
const flush = async () => {
|
|
131
|
+
if (isFlushing || isStopped) {
|
|
132
|
+
return;
|
|
133
|
+
}
|
|
134
|
+
const totalBytes = chunks.reduce((sum, chunk) => sum + chunk.length, 0);
|
|
135
|
+
if (totalBytes < minBufferBytes) {
|
|
136
|
+
return;
|
|
137
|
+
}
|
|
138
|
+
isFlushing = true;
|
|
139
|
+
const merged = Buffer.concat(chunks);
|
|
140
|
+
chunks.length = 0;
|
|
141
|
+
try {
|
|
142
|
+
const result = await stt({
|
|
143
|
+
llm,
|
|
144
|
+
prompt,
|
|
145
|
+
audioFormat,
|
|
146
|
+
model,
|
|
147
|
+
audio: merged
|
|
148
|
+
});
|
|
149
|
+
if (onTranscription) {
|
|
150
|
+
await onTranscription(result);
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
finally {
|
|
154
|
+
isFlushing = false;
|
|
155
|
+
}
|
|
156
|
+
};
|
|
157
|
+
const timer = setInterval(() => {
|
|
158
|
+
void flush();
|
|
159
|
+
}, flushIntervalMs);
|
|
160
|
+
return {
|
|
161
|
+
pushChunk(chunk) {
|
|
162
|
+
if (isStopped) {
|
|
163
|
+
return;
|
|
164
|
+
}
|
|
165
|
+
chunks.push(normalizePhoneSocketChunk(chunk));
|
|
166
|
+
},
|
|
167
|
+
async flush() {
|
|
168
|
+
await flush();
|
|
169
|
+
},
|
|
170
|
+
async stop() {
|
|
171
|
+
isStopped = true;
|
|
172
|
+
clearInterval(timer);
|
|
173
|
+
await flush();
|
|
174
|
+
}
|
|
175
|
+
};
|
|
176
|
+
}
|
|
177
|
+
//# sourceMappingURL=stt.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stt.js","sourceRoot":"","sources":["../../../../src/modalities/speech/stt/stt.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,kBAAkB,CAAA;AAC/C,OAAO,EAAE,MAAM,EAAsB,MAAM,uBAAuB,CAAA;AAClE,OAAO,EAAe,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AACpE,OAAO,EAAiB,aAAa,EAAE,MAAM,WAAW,CAAA;AAsBxD,SAAS,yBAAyB,CAAC,KAAuB;IACtD,IAAI,OAAO,KAAK,KAAK,QAAQ,EAAE,CAAC;QAC5B,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,EAAE,QAAQ,CAAC,CAAA;IACvC,CAAC;IAED,IAAI,MAAM,CAAC,QAAQ,CAAC,KAAK,CAAC,EAAE,CAAC;QACzB,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,OAAO,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAA;AAC7B,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAsCG;AACH,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,EACtB,GAAG,GAAG,MAAM,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EACrD,MAAM,GAAG,oCAAoC,EAC7C,WAAW,GAAG,KAAK,EACnB,KAAK,EACL,KAAK,EACI;IACT,MAAM,OAAO,GAAG,oBAAoB,CAAC,GAAG,CAAC,CAAA;IACzC,MAAM,WAAW,GAAG,MAAM,aAAa,CAAC,KAAK,CAAC,CAAA;IAE9C,MAAM,OAAO,GAAG,IAAI,YAAY,CAAC;QAC7B,OAAO,EAAE;YACL;gBACI,IAAI,EAAE,MAAM;gBACZ,IAAI,EAAE,MAAM;aACf;YACD;gBACI,IAAI,EAAE,aAAa;gBACnB,WAAW,EAAE;oBACT,IAAI,EAAE,WAAW;oBACjB,MAAM,EAAE,WAAW;iBACtB;aACJ;SACJ;KACJ,CAAC,CAAA;IAEF,MAAM,UAAU,GACZ,KAAK,IAAI,KAAK,KAAK,OAAO,CAAC,KAAK;QAC5B,CAAC,CAAC,MAAM,CAAC;YACH,QAAQ,EAAE,YAAY;YACtB,IAAI,EAAE,KAAK;YACX,KAAK;YACL,MAAM,EAAE,OAAO,CAAC,MAAM;SACzB,CAAC;QACJ,CAAC,CAAC,GAAG,CAAA;IAEb,MAAM,QAAQ,GAAG,MAAM,UAAU,CAAC,MAAM,CAAC,CAAC,OAAO,CAAC,CAAC,CAAA;IACnD,MAAM,IAAI,GAAG,OAAO,QAAQ,CAAC,OAAO,KAAK,QAAQ,CAAC,CAAC,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAA;IAEvG,OAAO;QACH,IAAI;QACJ,GAAG,EAAE,QAAQ;KAChB,CAAA;AACL,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AACH,MAAM,UAAU,2BAA2B,CAAC,EACxC,GAAG,GAAG,MAAM,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EACrD,eAAe,GAAG,IAAI,EACtB,cAAc,GAAG,IAAI,EACrB,MAAM,GAAG,mBAAmB,EAC5B,WAAW,GAAG,KAAK,EACnB,KAAK,EACL,eAAe,EACY;IAC3B,MAAM,MAAM,GAAa,EAAE,CAAA;IAC3B,IAAI,UAAU,GAAG,KAAK,CAAA;IACtB,IAAI,SAAS,GAAG,KAAK,CAAA;IAErB,MAAM,KAAK,GAAG,KAAK,IAAI,EAAE;QACrB,IAAI,UAAU,IAAI,SAAS,EAAE,CAAC;YAC1B,OAAM;QACV,CAAC;QAED,MAAM,UAAU,GAAG,MAAM,CAAC,MAAM,CAAC,CAAC,GAAG,EAAE,KAAK,EAAE,EAAE,CAAC,GAAG,GAAG,KAAK,CAAC,MAAM,EAAE,CAAC,CAAC,CAAA;QACvE,IAAI,UAAU,GAAG,cAAc,EAAE,CAAC;YAC9B,OAAM;QACV,CAAC;QAED,UAAU,GAAG,IAAI,CAAA;QACjB,MAAM,MAAM,GAAG,MAAM,CAAC,MAAM,CAAC,MAAM,CAAC,CAAA;QACpC,MAAM,CAAC,MAAM,GAAG,CAAC,CAAA;QAEjB,IAAI,CAAC;YACD,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC;gBACrB,GAAG;gBACH,MAAM;gBACN,WAAW;gBACX,KAAK;gBACL,KAAK,EAAE,MAAM;aAChB,CAAC,CAAA;YAEF,IAAI,eAAe,EAAE,CAAC;gBAClB,MAAM,eAAe,CAAC,MAAM,CAAC,CAAA;YACjC,CAAC;QACL,CAAC;gBAAS,CAAC;YACP,UAAU,GAAG,KAAK,CAAA;QACtB,CAAC;IACL,CAAC,CAAA;IAED,MAAM,KAAK,GAAG,WAAW,CAAC,GAAG,EAAE;QAC3B,KAAK,KAAK,EAAE,CAAA;IAChB,CAAC,EAAE,eAAe,CAAC,CAAA;IAEnB,OAAO;QACH,SAAS,CAAC,KAAuB;YAC7B,IAAI,SAAS,EAAE,CAAC;gBACZ,OAAM;YACV,CAAC;YACD,MAAM,CAAC,IAAI,CAAC,yBAAyB,CAAC,KAAK,CAAC,CAAC,CAAA;QACjD,CAAC;QACD,KAAK,CAAC,KAAK;YACP,MAAM,KAAK,EAAE,CAAA;QACjB,CAAC;QACD,KAAK,CAAC,IAAI;YACN,SAAS,GAAG,IAAI,CAAA;YAChB,aAAa,CAAC,KAAK,CAAC,CAAA;YACpB,MAAM,KAAK,EAAE,CAAA;QACjB,CAAC;KACJ,CAAA;AACL,CAAC"}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { AutoComplete } from "../../../helpers/llms";
|
|
2
|
+
export type TTSResponseFormat = "mp3" | "pcm";
|
|
3
|
+
export type OpenRouterTTSVoice = AutoComplete<"alloy" | "ash" | "ballad" | "cedar" | "coral" | "echo" | "fable" | "marin" | "nova" | "onyx" | "sage" | "shimmer" | "verse">;
|
|
4
|
+
export type TTSPayload = {
|
|
5
|
+
model: string;
|
|
6
|
+
input: string;
|
|
7
|
+
voice: OpenRouterTTSVoice;
|
|
8
|
+
response_format?: TTSResponseFormat;
|
|
9
|
+
speed?: number;
|
|
10
|
+
};
|
|
11
|
+
export declare function createTTSPayload(payload: TTSPayload): TTSPayload;
|
|
12
|
+
//# sourceMappingURL=helpers.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../../src/modalities/speech/tts/helpers.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,uBAAuB,CAAA;AAEpD,MAAM,MAAM,iBAAiB,GAAG,KAAK,GAAG,KAAK,CAAA;AAC7C,MAAM,MAAM,kBAAkB,GAAG,YAAY,CACvC,OAAO,GACP,KAAK,GACL,QAAQ,GACR,OAAO,GACP,OAAO,GACP,MAAM,GACN,OAAO,GACP,OAAO,GACP,MAAM,GACN,MAAM,GACN,MAAM,GACN,SAAS,GACT,OAAO,CACZ,CAAA;AAED,MAAM,MAAM,UAAU,GAAG;IACrB,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,MAAM,CAAA;IACb,KAAK,EAAE,kBAAkB,CAAA;IACzB,eAAe,CAAC,EAAE,iBAAiB,CAAA;IACnC,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,wBAAgB,gBAAgB,CAAC,OAAO,EAAE,UAAU,GAAG,UAAU,CAQhE"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../../../src/modalities/speech/tts/helpers.ts"],"names":[],"mappings":"AA2BA,MAAM,UAAU,gBAAgB,CAAC,OAAmB;IAChD,OAAO;QACH,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,eAAe,EAAE,OAAO,CAAC,eAAe,IAAI,KAAK;QACjD,KAAK,EAAE,OAAO,CAAC,KAAK;KACvB,CAAA;AACL,CAAC"}
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { LLMInstance } from "../../openrouter";
|
|
2
|
+
import { OpenRouterTTSVoice, TTSResponseFormat } from "./helpers";
|
|
3
|
+
type TTSOptions = {
|
|
4
|
+
llm?: LLMInstance;
|
|
5
|
+
text: string;
|
|
6
|
+
model: OpenRouterTTSVoice;
|
|
7
|
+
responseFormat?: TTSResponseFormat;
|
|
8
|
+
speed?: number;
|
|
9
|
+
};
|
|
10
|
+
type TTSPhoneSocketStreamOptions = TTSOptions & {
|
|
11
|
+
chunkSizeBytes?: number;
|
|
12
|
+
emitBase64?: boolean;
|
|
13
|
+
onChunk: (chunk: Uint8Array | string, index: number) => Promise<void> | void;
|
|
14
|
+
};
|
|
15
|
+
/**
|
|
16
|
+
* Converts text to speech via OpenRouter's dedicated `/audio/speech` endpoint.
|
|
17
|
+
*
|
|
18
|
+
* Internally this function calls OpenRouter directly and returns raw audio bytes.
|
|
19
|
+
* If no `llm` is provided, it creates one with
|
|
20
|
+
* `getLLM({ provider: "openrouter", type: "tts" })` and reads
|
|
21
|
+
* `process.env.OPENROUTER_API_KEY`.
|
|
22
|
+
*
|
|
23
|
+
* Make sure `OPENROUTER_API_KEY` is set in your `.env`.
|
|
24
|
+
*
|
|
25
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
26
|
+
* @param params.text Text that should be synthesized to audio.
|
|
27
|
+
* @param params.model Voice identifier to use for speech generation.
|
|
28
|
+
* @param params.responseFormat Audio response format (`mp3` or `pcm`).
|
|
29
|
+
* @param params.speed Playback speed multiplier (default `1.0`).
|
|
30
|
+
* @returns Promise with `audioBytes` and optional response metadata headers.
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* CONFIG:
|
|
34
|
+
* ```ts
|
|
35
|
+
* tts({
|
|
36
|
+
* llm = getLLM({ provider: "openrouter", type: "tts" }),
|
|
37
|
+
* responseFormat = "mp3",
|
|
38
|
+
* speed = 1.0,
|
|
39
|
+
* text,
|
|
40
|
+
* model
|
|
41
|
+
* })
|
|
42
|
+
* ```
|
|
43
|
+
*
|
|
44
|
+
* @example
|
|
45
|
+
* ```ts
|
|
46
|
+
* const speech = await tts({
|
|
47
|
+
* text: "Hallo! Das ist ein Test.",
|
|
48
|
+
* model: "nova",
|
|
49
|
+
* responseFormat: "mp3"
|
|
50
|
+
* });
|
|
51
|
+
*
|
|
52
|
+
* console.log(speech.audioBytes.length);
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export declare function tts({ llm, responseFormat, speed, text, model }: TTSOptions): Promise<{
|
|
56
|
+
audioBytes: Uint8Array;
|
|
57
|
+
contentType?: string;
|
|
58
|
+
generationId?: string;
|
|
59
|
+
}>;
|
|
60
|
+
/**
|
|
61
|
+
* Streams synthesized TTS audio as sequential chunks for phone socket pipelines
|
|
62
|
+
* (for example Twilio/Telnyx websocket media send loops).
|
|
63
|
+
*
|
|
64
|
+
* Internally this helper calls `tts()` once and splits the returned audio into
|
|
65
|
+
* chunked payloads that can be forwarded to a live call socket.
|
|
66
|
+
*
|
|
67
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
68
|
+
* @param params.text Text that should be synthesized to audio.
|
|
69
|
+
* @param params.model Voice identifier for speech generation.
|
|
70
|
+
* @param params.responseFormat Audio response format (`mp3` or `pcm`).
|
|
71
|
+
* @param params.speed Playback speed multiplier.
|
|
72
|
+
* @param params.chunkSizeBytes Size of each emitted audio chunk.
|
|
73
|
+
* @param params.emitBase64 If true, emits base64 chunks; otherwise emits `Uint8Array`.
|
|
74
|
+
* @param params.onChunk Callback that receives each emitted chunk in order.
|
|
75
|
+
* @returns Promise with total emitted chunks and response metadata.
|
|
76
|
+
*
|
|
77
|
+
* @example
|
|
78
|
+
* CONFIG:
|
|
79
|
+
* ```ts
|
|
80
|
+
* streamTTSOverPhoneSocket({
|
|
81
|
+
* llm = getLLM({ provider: "openrouter", type: "tts" }),
|
|
82
|
+
speed = 1.0,
|
|
83
|
+
chunkSizeBytes = 3200,
|
|
84
|
+
emitBase64 = true,
|
|
85
|
+
responseFormat = "mp3",
|
|
86
|
+
text,
|
|
87
|
+
model,
|
|
88
|
+
onChunk
|
|
89
|
+
* })
|
|
90
|
+
* ```
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```ts
|
|
94
|
+
* await streamTTSOverPhoneSocket({
|
|
95
|
+
model,
|
|
96
|
+
onChunk
|
|
97
|
+
* })
|
|
98
|
+
* ```
|
|
99
|
+
*
|
|
100
|
+
* @example
|
|
101
|
+
* ```ts
|
|
102
|
+
* await streamTTSOverPhoneSocket({
|
|
103
|
+
* text: "Willkommen beim Support.",
|
|
104
|
+
* model: "nova",
|
|
105
|
+
* onChunk: async (chunk) => socket.send(String(chunk))
|
|
106
|
+
* });
|
|
107
|
+
* ```
|
|
108
|
+
*/
|
|
109
|
+
export declare function streamTTSOverPhoneSocket({ llm, speed, chunkSizeBytes, emitBase64, responseFormat, text, model, onChunk }: TTSPhoneSocketStreamOptions): Promise<{
|
|
110
|
+
totalChunks: number;
|
|
111
|
+
contentType?: string;
|
|
112
|
+
generationId?: string;
|
|
113
|
+
}>;
|
|
114
|
+
export {};
|
|
115
|
+
//# sourceMappingURL=tts.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.d.ts","sourceRoot":"","sources":["../../../../src/modalities/speech/tts/tts.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,WAAW,EAAwB,MAAM,kBAAkB,CAAA;AACpE,OAAO,EAAoB,kBAAkB,EAAE,iBAAiB,EAAE,MAAM,WAAW,CAAA;AAEnF,KAAK,UAAU,GAAG;IACd,GAAG,CAAC,EAAE,WAAW,CAAA;IACjB,IAAI,EAAE,MAAM,CAAA;IACZ,KAAK,EAAE,kBAAkB,CAAA;IACzB,cAAc,CAAC,EAAE,iBAAiB,CAAA;IAClC,KAAK,CAAC,EAAE,MAAM,CAAA;CACjB,CAAA;AAED,KAAK,2BAA2B,GAAG,UAAU,GAAG;IAC5C,cAAc,CAAC,EAAE,MAAM,CAAA;IACvB,UAAU,CAAC,EAAE,OAAO,CAAA;IACpB,OAAO,EAAE,CAAC,KAAK,EAAE,UAAU,GAAG,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,OAAO,CAAC,IAAI,CAAC,GAAG,IAAI,CAAA;CAC/E,CAAA;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AACH,wBAAsB,GAAG,CAAC,EACtB,GAAqD,EACrD,cAAsB,EACtB,KAAW,EACX,IAAI,EACJ,KAAK,EACR,EAAE,UAAU,GAAG,OAAO,CAAC;IAAE,UAAU,EAAE,UAAU,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CA+B/F;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,wBAAsB,wBAAwB,CAAC,EAC3C,GAAqD,EACrD,KAAW,EACX,cAAqB,EACrB,UAAiB,EACjB,cAAsB,EACtB,IAAI,EACJ,KAAK,EACL,OAAO,EACV,EAAE,2BAA2B,GAAG,OAAO,CAAC;IAAE,WAAW,EAAE,MAAM,CAAC;IAAC,WAAW,CAAC,EAAE,MAAM,CAAC;IAAC,YAAY,CAAC,EAAE,MAAM,CAAA;CAAE,CAAC,CAsB7G"}
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
import { getLLM } from "../../../helpers/llms";
|
|
2
|
+
import { getOpenRouterRuntime } from "../../openrouter";
|
|
3
|
+
import { createTTSPayload } from "./helpers";
|
|
4
|
+
/**
|
|
5
|
+
* Converts text to speech via OpenRouter's dedicated `/audio/speech` endpoint.
|
|
6
|
+
*
|
|
7
|
+
* Internally this function calls OpenRouter directly and returns raw audio bytes.
|
|
8
|
+
* If no `llm` is provided, it creates one with
|
|
9
|
+
* `getLLM({ provider: "openrouter", type: "tts" })` and reads
|
|
10
|
+
* `process.env.OPENROUTER_API_KEY`.
|
|
11
|
+
*
|
|
12
|
+
* Make sure `OPENROUTER_API_KEY` is set in your `.env`.
|
|
13
|
+
*
|
|
14
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
15
|
+
* @param params.text Text that should be synthesized to audio.
|
|
16
|
+
* @param params.model Voice identifier to use for speech generation.
|
|
17
|
+
* @param params.responseFormat Audio response format (`mp3` or `pcm`).
|
|
18
|
+
* @param params.speed Playback speed multiplier (default `1.0`).
|
|
19
|
+
* @returns Promise with `audioBytes` and optional response metadata headers.
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* CONFIG:
|
|
23
|
+
* ```ts
|
|
24
|
+
* tts({
|
|
25
|
+
* llm = getLLM({ provider: "openrouter", type: "tts" }),
|
|
26
|
+
* responseFormat = "mp3",
|
|
27
|
+
* speed = 1.0,
|
|
28
|
+
* text,
|
|
29
|
+
* model
|
|
30
|
+
* })
|
|
31
|
+
* ```
|
|
32
|
+
*
|
|
33
|
+
* @example
|
|
34
|
+
* ```ts
|
|
35
|
+
* const speech = await tts({
|
|
36
|
+
* text: "Hallo! Das ist ein Test.",
|
|
37
|
+
* model: "nova",
|
|
38
|
+
* responseFormat: "mp3"
|
|
39
|
+
* });
|
|
40
|
+
*
|
|
41
|
+
* console.log(speech.audioBytes.length);
|
|
42
|
+
* ```
|
|
43
|
+
*/
|
|
44
|
+
export async function tts({ llm = getLLM({ provider: "openrouter", type: "tts" }), responseFormat = "mp3", speed = 1.0, text, model }) {
|
|
45
|
+
const runtime = getOpenRouterRuntime(llm);
|
|
46
|
+
const payload = createTTSPayload({
|
|
47
|
+
model: runtime.model,
|
|
48
|
+
input: text,
|
|
49
|
+
voice: model,
|
|
50
|
+
response_format: responseFormat,
|
|
51
|
+
speed
|
|
52
|
+
});
|
|
53
|
+
const response = await fetch(`${runtime.baseURL}/audio/speech`, {
|
|
54
|
+
method: "POST",
|
|
55
|
+
headers: {
|
|
56
|
+
Authorization: `Bearer ${runtime.apiKey}`,
|
|
57
|
+
"Content-Type": "application/json"
|
|
58
|
+
},
|
|
59
|
+
body: JSON.stringify(payload)
|
|
60
|
+
});
|
|
61
|
+
if (!response.ok) {
|
|
62
|
+
const errorText = await response.text();
|
|
63
|
+
throw new Error(`OpenRouter TTS failed (${response.status}): ${errorText}`);
|
|
64
|
+
}
|
|
65
|
+
const audioBuffer = await response.arrayBuffer();
|
|
66
|
+
return {
|
|
67
|
+
audioBytes: new Uint8Array(audioBuffer),
|
|
68
|
+
contentType: response.headers.get("content-type") ?? undefined,
|
|
69
|
+
generationId: response.headers.get("x-generation-id") ?? undefined
|
|
70
|
+
};
|
|
71
|
+
}
|
|
72
|
+
/**
|
|
73
|
+
* Streams synthesized TTS audio as sequential chunks for phone socket pipelines
|
|
74
|
+
* (for example Twilio/Telnyx websocket media send loops).
|
|
75
|
+
*
|
|
76
|
+
* Internally this helper calls `tts()` once and splits the returned audio into
|
|
77
|
+
* chunked payloads that can be forwarded to a live call socket.
|
|
78
|
+
*
|
|
79
|
+
* @param params.llm Optional LLM instance from `getLLM(...)`.
|
|
80
|
+
* @param params.text Text that should be synthesized to audio.
|
|
81
|
+
* @param params.model Voice identifier for speech generation.
|
|
82
|
+
* @param params.responseFormat Audio response format (`mp3` or `pcm`).
|
|
83
|
+
* @param params.speed Playback speed multiplier.
|
|
84
|
+
* @param params.chunkSizeBytes Size of each emitted audio chunk.
|
|
85
|
+
* @param params.emitBase64 If true, emits base64 chunks; otherwise emits `Uint8Array`.
|
|
86
|
+
* @param params.onChunk Callback that receives each emitted chunk in order.
|
|
87
|
+
* @returns Promise with total emitted chunks and response metadata.
|
|
88
|
+
*
|
|
89
|
+
* @example
|
|
90
|
+
* CONFIG:
|
|
91
|
+
* ```ts
|
|
92
|
+
* streamTTSOverPhoneSocket({
|
|
93
|
+
* llm = getLLM({ provider: "openrouter", type: "tts" }),
|
|
94
|
+
speed = 1.0,
|
|
95
|
+
chunkSizeBytes = 3200,
|
|
96
|
+
emitBase64 = true,
|
|
97
|
+
responseFormat = "mp3",
|
|
98
|
+
text,
|
|
99
|
+
model,
|
|
100
|
+
onChunk
|
|
101
|
+
* })
|
|
102
|
+
* ```
|
|
103
|
+
*
|
|
104
|
+
* @example
|
|
105
|
+
* ```ts
|
|
106
|
+
* await streamTTSOverPhoneSocket({
|
|
107
|
+
model,
|
|
108
|
+
onChunk
|
|
109
|
+
* })
|
|
110
|
+
* ```
|
|
111
|
+
*
|
|
112
|
+
* @example
|
|
113
|
+
* ```ts
|
|
114
|
+
* await streamTTSOverPhoneSocket({
|
|
115
|
+
* text: "Willkommen beim Support.",
|
|
116
|
+
* model: "nova",
|
|
117
|
+
* onChunk: async (chunk) => socket.send(String(chunk))
|
|
118
|
+
* });
|
|
119
|
+
* ```
|
|
120
|
+
*/
|
|
121
|
+
export async function streamTTSOverPhoneSocket({ llm = getLLM({ provider: "openrouter", type: "tts" }), speed = 1.0, chunkSizeBytes = 3200, emitBase64 = true, responseFormat = "mp3", text, model, onChunk }) {
|
|
122
|
+
const result = await tts({
|
|
123
|
+
llm,
|
|
124
|
+
text,
|
|
125
|
+
model,
|
|
126
|
+
responseFormat,
|
|
127
|
+
speed
|
|
128
|
+
});
|
|
129
|
+
let index = 0;
|
|
130
|
+
for (let offset = 0; offset < result.audioBytes.length; offset += chunkSizeBytes) {
|
|
131
|
+
const chunk = result.audioBytes.slice(offset, offset + chunkSizeBytes);
|
|
132
|
+
const outgoing = emitBase64 ? Buffer.from(chunk).toString("base64") : chunk;
|
|
133
|
+
await onChunk(outgoing, index);
|
|
134
|
+
index += 1;
|
|
135
|
+
}
|
|
136
|
+
return {
|
|
137
|
+
totalChunks: index,
|
|
138
|
+
contentType: result.contentType,
|
|
139
|
+
generationId: result.generationId
|
|
140
|
+
};
|
|
141
|
+
}
|
|
142
|
+
//# sourceMappingURL=tts.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"tts.js","sourceRoot":"","sources":["../../../../src/modalities/speech/tts/tts.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAA;AAC9C,OAAO,EAAe,oBAAoB,EAAE,MAAM,kBAAkB,CAAA;AACpE,OAAO,EAAE,gBAAgB,EAAyC,MAAM,WAAW,CAAA;AAgBnF;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAuCG;AACH,MAAM,CAAC,KAAK,UAAU,GAAG,CAAC,EACtB,GAAG,GAAG,MAAM,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EACrD,cAAc,GAAG,KAAK,EACtB,KAAK,GAAG,GAAG,EACX,IAAI,EACJ,KAAK,EACI;IACT,MAAM,OAAO,GAAG,oBAAoB,CAAC,GAAG,CAAC,CAAA;IAEzC,MAAM,OAAO,GAAG,gBAAgB,CAAC;QAC7B,KAAK,EAAE,OAAO,CAAC,KAAK;QACpB,KAAK,EAAC,IAAI;QACV,KAAK,EAAC,KAAK;QACX,eAAe,EAAE,cAAc;QAC/B,KAAK;KACR,CAAC,CAAA;IAEF,MAAM,QAAQ,GAAG,MAAM,KAAK,CAAC,GAAG,OAAO,CAAC,OAAO,eAAe,EAAE;QAC5D,MAAM,EAAE,MAAM;QACd,OAAO,EAAE;YACL,aAAa,EAAE,UAAU,OAAO,CAAC,MAAM,EAAE;YACzC,cAAc,EAAE,kBAAkB;SACrC;QACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,OAAO,CAAC;KAChC,CAAC,CAAA;IAEF,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;QACf,MAAM,SAAS,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAA;QACvC,MAAM,IAAI,KAAK,CAAC,0BAA0B,QAAQ,CAAC,MAAM,MAAM,SAAS,EAAE,CAAC,CAAA;IAC/E,CAAC;IAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAA;IAChD,OAAO;QACH,UAAU,EAAE,IAAI,UAAU,CAAC,WAAW,CAAC;QACvC,WAAW,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,cAAc,CAAC,IAAI,SAAS;QAC9D,YAAY,EAAE,QAAQ,CAAC,OAAO,CAAC,GAAG,CAAC,iBAAiB,CAAC,IAAI,SAAS;KACrE,CAAA;AACL,CAAC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;GAgDG;AACH,MAAM,CAAC,KAAK,UAAU,wBAAwB,CAAC,EAC3C,GAAG,GAAG,MAAM,CAAC,EAAE,QAAQ,EAAE,YAAY,EAAE,IAAI,EAAE,KAAK,EAAE,CAAC,EACrD,KAAK,GAAG,GAAG,EACX,cAAc,GAAG,IAAI,EACrB,UAAU,GAAG,IAAI,EACjB,cAAc,GAAG,KAAK,EACtB,IAAI,EACJ,KAAK,EACL,OAAO,EACmB;IAC1B,MAAM,MAAM,GAAG,MAAM,GAAG,CAAC;QACrB,GAAG;QACH,IAAI;QACJ,KAAK;QACL,cAAc;QACd,KAAK;KACR,CAAC,CAAA;IAEF,IAAI,KAAK,GAAG,CAAC,CAAA;IACb,KAAK,IAAI,MAAM,GAAG,CAAC,EAAE,MAAM,GAAG,MAAM,CAAC,UAAU,CAAC,MAAM,EAAE,MAAM,IAAI,cAAc,EAAE,CAAC;QAC/E,MAAM,KAAK,GAAG,MAAM,CAAC,UAAU,CAAC,KAAK,CAAC,MAAM,EAAE,MAAM,GAAG,cAAc,CAAC,CAAA;QACtE,MAAM,QAAQ,GAAG,UAAU,CAAC,CAAC,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAC,CAAC,CAAC,KAAK,CAAA;QAC3E,MAAM,OAAO,CAAC,QAAQ,EAAE,KAAK,CAAC,CAAA;QAC9B,KAAK,IAAI,CAAC,CAAA;IACd,CAAC;IAED,OAAO;QACH,WAAW,EAAE,KAAK;QAClB,WAAW,EAAE,MAAM,CAAC,WAAW;QAC/B,YAAY,EAAE,MAAM,CAAC,YAAY;KACpC,CAAA;AACL,CAAC"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.d.ts","sourceRoot":"","sources":["../../../src/modalities/vision/helpers.ts"],"names":[],"mappings":"AAAA,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAA;AAErC,wBAAgB,oBAAoB,CAAC,KAAK,EAAE,gBAAgB,GAAG,MAAM,CAMpE"}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"helpers.js","sourceRoot":"","sources":["../../../src/modalities/vision/helpers.ts"],"names":[],"mappings":"AAEA,MAAM,UAAU,oBAAoB,CAAC,KAAuB;IACxD,IAAI,KAAK,CAAC,UAAU,CAAC,SAAS,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,UAAU,CAAC,IAAI,KAAK,CAAC,UAAU,CAAC,aAAa,CAAC,EAAE,CAAC;QACjG,OAAO,KAAK,CAAA;IAChB,CAAC;IAED,OAAO,0BAA0B,KAAK,EAAE,CAAA;AAC5C,CAAC"}
|