@orka-js/realtime 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/dist/index.cjs +231 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +178 -0
- package/dist/index.d.ts +178 -0
- package/dist/index.js +227 -0
- package/dist/index.js.map +1 -0
- package/package.json +41 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Orka Team
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/dist/index.cjs
ADDED
|
@@ -0,0 +1,231 @@
|
|
|
1
|
+
'use strict';
|
|
2
|
+
|
|
3
|
+
var agent = require('@orka-js/agent');
|
|
4
|
+
|
|
5
|
+
// src/realtime-agent.ts
|
|
6
|
+
var RealtimeAgent = class {
|
|
7
|
+
config;
|
|
8
|
+
stt;
|
|
9
|
+
tts;
|
|
10
|
+
innerAgent;
|
|
11
|
+
constructor(options) {
|
|
12
|
+
this.config = options.config;
|
|
13
|
+
this.stt = options.stt;
|
|
14
|
+
this.tts = options.tts;
|
|
15
|
+
this.innerAgent = new agent.StreamingToolAgent(
|
|
16
|
+
{
|
|
17
|
+
goal: options.config.goal,
|
|
18
|
+
systemPrompt: options.config.systemPrompt,
|
|
19
|
+
tools: options.tools ?? []
|
|
20
|
+
},
|
|
21
|
+
options.llm
|
|
22
|
+
);
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Process audio: transcribe → run LLM → synthesize.
|
|
26
|
+
* Returns final transcript, response text and audio buffer.
|
|
27
|
+
*/
|
|
28
|
+
async process(audio, audioFormat = "audio/wav") {
|
|
29
|
+
const transcript = await this.stt.transcribe(audio, audioFormat);
|
|
30
|
+
let response = "";
|
|
31
|
+
for await (const event of this.innerAgent.runStream(transcript)) {
|
|
32
|
+
if (event.type === "done") {
|
|
33
|
+
response = event.content;
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
let audioOutput;
|
|
37
|
+
if (this.config.tts !== false && this.tts && response) {
|
|
38
|
+
audioOutput = await this.tts.synthesize(response);
|
|
39
|
+
}
|
|
40
|
+
return { transcript, response, audio: audioOutput };
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Process audio as a stream of events — yields transcript, tokens, audio chunks, and done.
|
|
44
|
+
*/
|
|
45
|
+
async *processStream(audio, audioFormat = "audio/wav") {
|
|
46
|
+
let transcript;
|
|
47
|
+
try {
|
|
48
|
+
transcript = await this.stt.transcribe(audio, audioFormat);
|
|
49
|
+
} catch (error) {
|
|
50
|
+
yield { type: "error", error, message: error.message };
|
|
51
|
+
return;
|
|
52
|
+
}
|
|
53
|
+
yield { type: "transcript", text: transcript };
|
|
54
|
+
let fullResponse = "";
|
|
55
|
+
for await (const event of this.innerAgent.runStream(transcript)) {
|
|
56
|
+
switch (event.type) {
|
|
57
|
+
case "token":
|
|
58
|
+
fullResponse += event.token;
|
|
59
|
+
yield { type: "token", content: event.token };
|
|
60
|
+
break;
|
|
61
|
+
case "tool_call":
|
|
62
|
+
yield { type: "tool_call", name: event.name, args: event.arguments };
|
|
63
|
+
break;
|
|
64
|
+
case "tool_result":
|
|
65
|
+
yield { type: "tool_result", name: String(event.toolCallId), result: event.result };
|
|
66
|
+
break;
|
|
67
|
+
case "done":
|
|
68
|
+
fullResponse = event.content || fullResponse;
|
|
69
|
+
break;
|
|
70
|
+
case "error":
|
|
71
|
+
yield { type: "error", error: event.error, message: event.error.message };
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
let audioOutput;
|
|
76
|
+
if (this.config.tts !== false && this.tts && fullResponse) {
|
|
77
|
+
if (this.tts.synthesizeStream) {
|
|
78
|
+
for await (const chunk of this.tts.synthesizeStream(fullResponse)) {
|
|
79
|
+
yield { type: "audio_chunk", data: chunk };
|
|
80
|
+
if (!audioOutput) audioOutput = chunk;
|
|
81
|
+
else audioOutput = Buffer.concat([audioOutput, chunk]);
|
|
82
|
+
}
|
|
83
|
+
} else {
|
|
84
|
+
audioOutput = await this.tts.synthesize(fullResponse);
|
|
85
|
+
yield { type: "audio_chunk", data: audioOutput };
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
yield { type: "done", transcript, response: fullResponse, audio: audioOutput };
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Returns an Express/Node.js-compatible WebSocket message handler.
|
|
92
|
+
* Expects binary audio messages from the client, sends back JSON events.
|
|
93
|
+
*
|
|
94
|
+
* @example
|
|
95
|
+
* ```typescript
|
|
96
|
+
* import { WebSocketServer } from 'ws';
|
|
97
|
+
* const wss = new WebSocketServer({ port: 8080 });
|
|
98
|
+
* wss.on('connection', agent.wsHandler());
|
|
99
|
+
* ```
|
|
100
|
+
*/
|
|
101
|
+
wsHandler() {
|
|
102
|
+
return (ws) => {
|
|
103
|
+
ws.on("message", async (data) => {
|
|
104
|
+
const audio = data instanceof Buffer ? data : data instanceof ArrayBuffer ? Buffer.from(data) : Buffer.from(data);
|
|
105
|
+
try {
|
|
106
|
+
for await (const event of this.processStream(audio)) {
|
|
107
|
+
ws.send(JSON.stringify(event));
|
|
108
|
+
}
|
|
109
|
+
} catch (error) {
|
|
110
|
+
ws.send(JSON.stringify({
|
|
111
|
+
type: "error",
|
|
112
|
+
error: { message: error.message },
|
|
113
|
+
message: error.message
|
|
114
|
+
}));
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
};
|
|
118
|
+
}
|
|
119
|
+
};
|
|
120
|
+
|
|
121
|
+
// src/adapters/openai-stt.ts
|
|
122
|
+
var OpenAISTTAdapter = class {
|
|
123
|
+
apiKey;
|
|
124
|
+
model;
|
|
125
|
+
baseURL;
|
|
126
|
+
timeoutMs;
|
|
127
|
+
constructor(config) {
|
|
128
|
+
this.apiKey = config.apiKey;
|
|
129
|
+
this.model = config.model ?? "whisper-1";
|
|
130
|
+
this.baseURL = config.baseURL ?? "https://api.openai.com/v1";
|
|
131
|
+
this.timeoutMs = config.timeoutMs ?? 12e4;
|
|
132
|
+
}
|
|
133
|
+
async transcribe(audio, format = "audio/wav") {
|
|
134
|
+
const controller = new AbortController();
|
|
135
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
136
|
+
const ext = format.includes("webm") ? "webm" : format.includes("mp3") ? "mp3" : format.includes("ogg") ? "ogg" : "wav";
|
|
137
|
+
const audioData = audio instanceof Buffer ? audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength) : audio;
|
|
138
|
+
const blob = new Blob([audioData], { type: format });
|
|
139
|
+
const formData = new FormData();
|
|
140
|
+
formData.append("file", blob, `audio.${ext}`);
|
|
141
|
+
formData.append("model", this.model);
|
|
142
|
+
formData.append("response_format", "text");
|
|
143
|
+
let response;
|
|
144
|
+
try {
|
|
145
|
+
response = await fetch(`${this.baseURL}/audio/transcriptions`, {
|
|
146
|
+
method: "POST",
|
|
147
|
+
signal: controller.signal,
|
|
148
|
+
headers: { "Authorization": `Bearer ${this.apiKey}` },
|
|
149
|
+
body: formData
|
|
150
|
+
});
|
|
151
|
+
} catch (error) {
|
|
152
|
+
clearTimeout(timeout);
|
|
153
|
+
if (error.name === "AbortError") {
|
|
154
|
+
throw new Error(`Whisper API request timed out after ${this.timeoutMs}ms`);
|
|
155
|
+
}
|
|
156
|
+
throw error;
|
|
157
|
+
} finally {
|
|
158
|
+
clearTimeout(timeout);
|
|
159
|
+
}
|
|
160
|
+
if (!response.ok) {
|
|
161
|
+
const err = await response.text();
|
|
162
|
+
throw new Error(`Whisper API error: ${response.status} - ${err}`);
|
|
163
|
+
}
|
|
164
|
+
return response.text();
|
|
165
|
+
}
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
// src/adapters/openai-tts.ts
|
|
169
|
+
var OpenAITTSAdapter = class {
|
|
170
|
+
apiKey;
|
|
171
|
+
model;
|
|
172
|
+
defaultVoice;
|
|
173
|
+
baseURL;
|
|
174
|
+
timeoutMs;
|
|
175
|
+
constructor(config) {
|
|
176
|
+
this.apiKey = config.apiKey;
|
|
177
|
+
this.model = config.model ?? "tts-1";
|
|
178
|
+
this.defaultVoice = config.voice ?? "alloy";
|
|
179
|
+
this.baseURL = config.baseURL ?? "https://api.openai.com/v1";
|
|
180
|
+
this.timeoutMs = config.timeoutMs ?? 6e4;
|
|
181
|
+
}
|
|
182
|
+
async synthesize(text, options = {}) {
|
|
183
|
+
const controller = new AbortController();
|
|
184
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
185
|
+
let response;
|
|
186
|
+
try {
|
|
187
|
+
response = await fetch(`${this.baseURL}/audio/speech`, {
|
|
188
|
+
method: "POST",
|
|
189
|
+
signal: controller.signal,
|
|
190
|
+
headers: {
|
|
191
|
+
"Content-Type": "application/json",
|
|
192
|
+
"Authorization": `Bearer ${this.apiKey}`
|
|
193
|
+
},
|
|
194
|
+
body: JSON.stringify({
|
|
195
|
+
model: this.model,
|
|
196
|
+
input: text,
|
|
197
|
+
voice: options.voice ?? this.defaultVoice,
|
|
198
|
+
response_format: options.format ?? "mp3",
|
|
199
|
+
speed: options.speed ?? 1
|
|
200
|
+
})
|
|
201
|
+
});
|
|
202
|
+
} catch (error) {
|
|
203
|
+
clearTimeout(timeout);
|
|
204
|
+
if (error.name === "AbortError") {
|
|
205
|
+
throw new Error(`TTS API request timed out after ${this.timeoutMs}ms`);
|
|
206
|
+
}
|
|
207
|
+
throw error;
|
|
208
|
+
} finally {
|
|
209
|
+
clearTimeout(timeout);
|
|
210
|
+
}
|
|
211
|
+
if (!response.ok) {
|
|
212
|
+
const err = await response.text();
|
|
213
|
+
throw new Error(`TTS API error: ${response.status} - ${err}`);
|
|
214
|
+
}
|
|
215
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
216
|
+
return Buffer.from(arrayBuffer);
|
|
217
|
+
}
|
|
218
|
+
async *synthesizeStream(text, options = {}) {
|
|
219
|
+
const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text];
|
|
220
|
+
for (const sentence of sentences) {
|
|
221
|
+
const audio = await this.synthesize(sentence.trim(), options);
|
|
222
|
+
yield audio;
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
};
|
|
226
|
+
|
|
227
|
+
exports.OpenAISTTAdapter = OpenAISTTAdapter;
|
|
228
|
+
exports.OpenAITTSAdapter = OpenAITTSAdapter;
|
|
229
|
+
exports.RealtimeAgent = RealtimeAgent;
|
|
230
|
+
//# sourceMappingURL=index.cjs.map
|
|
231
|
+
//# sourceMappingURL=index.cjs.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/realtime-agent.ts","../src/adapters/openai-stt.ts","../src/adapters/openai-tts.ts"],"names":["StreamingToolAgent"],"mappings":";;;;;AAoCO,IAAM,gBAAN,MAAoB;AAAA,EACjB,MAAA;AAAA,EACA,GAAA;AAAA,EACA,GAAA;AAAA,EACA,UAAA;AAAA,EAER,YAAY,OAAA,EAA+B;AACzC,IAAA,IAAA,CAAK,SAAS,OAAA,CAAQ,MAAA;AACtB,IAAA,IAAA,CAAK,MAAM,OAAA,CAAQ,GAAA;AACnB,IAAA,IAAA,CAAK,MAAM,OAAA,CAAQ,GAAA;AAEnB,IAAA,IAAA,CAAK,aAAa,IAAIA,wBAAA;AAAA,MACpB;AAAA,QACE,IAAA,EAAM,QAAQ,MAAA,CAAO,IAAA;AAAA,QACrB,YAAA,EAAc,QAAQ,MAAA,CAAO,YAAA;AAAA,QAC7B,KAAA,EAAO,OAAA,CAAQ,KAAA,IAAS;AAAC,OAC3B;AAAA,MACA,OAAA,CAAQ;AAAA,KACV;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,OAAA,CAAQ,KAAA,EAA6B,WAAA,GAAc,WAAA,EAA6C;AACpG,IAAA,MAAM,aAAa,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,OAAO,WAAW,CAAA;AAE/D,IAAA,IAAI,QAAA,GAAW,EAAA;AACf,IAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,UAAA,CAAW,SAAA,CAAU,UAAU,CAAA,EAAG;AAC/D,MAAA,IAAI,KAAA,CAAM,SAAS,MAAA,EAAQ;AACzB,QAAA,QAAA,GAAW,KAAA,CAAM,OAAA;AAAA,MACnB;AAAA,IACF;AAEA,IAAA,IAAI,WAAA;AACJ,IAAA,IAAI,KAAK,MAAA,CAAO,GAAA,KAAQ,KAAA,IAAS,IAAA,CAAK,OAAO,QAAA,EAAU;AACrD,MAAA,WAAA,GAAc,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,QAAQ,CAAA;AAAA,IAClD;AAEA,IAAA,OAAO,EAAE,UAAA,EAAY,QAAA,EAAU,KAAA,EAAO,WAAA,EAAY;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA,EAKA,OAAO,aAAA,CACL,KAAA,EACA,WAAA,GAAc,WAAA,EACgB;AAC9B,IAAA,IAAI,UAAA;AACJ,IAAA,IAAI;AACF,MAAA,UAAA,GAAa,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,OAAO,WAAW,CAAA;AAAA,IAC3D,SAAS,KAAA,EAAO;AACd,MAAA,MAAM,EAAE,IAAA,EAAM,OAAA,EAAS,KAAA,EAAuB,OAAA,EAAU,MAAgB,OAAA,EAAQ;AAChF,MAAA;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,IAAA,EAAM,YAAA,EAAc,IAAA,EAAM,UAAA,EAAW;AAE7C,IAAA,IAAI,YAAA,GAAe,EAAA;AAEnB,IAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,UAAA,CAAW,SAAA,CAAU,UAAU,CAAA,EAAG;AAC/D,MAAA,QAAQ,MAAM,IAAA;AAAM,QAClB,KAAK,OAAA;AACH,UAAA,YAAA,IAAgB,KAAA,CAAM,KAAA;AACtB,UAAA,MAAM,EAAE,IAAA,EAAM,OAAA,EAAS,OAAA,EAAS,MAAM,KAAA,EAAM;AAC5C,UAAA;AAAA,QACF,KAAK,WAAA;AACH,UAAA,MAAM,EAAE,MAAM,WAAA,EAAa,IAAA,EAAM,MAAM,IAAA,EAAM,IAAA,EAAM,MAAM,SAAA,EAAU;AACnE,UAAA;AAAA,QACF,KAAK,aAAA;AACH,UAAA,MAAM,EAAE,IAAA,EAAM,aAAA,EAAe,IAAA,EAAM,MAAA,CAAO,MAAM,UAAU,CAAA,EAAG,MAAA,EAAQ,KAAA,CAAM,MAAA,EAAO;AAClF,UAAA;AAAA,QACF,KAAK,MAAA;AACH,UAAA,YAAA,GAAe,MAAM,OAAA,IAAW,YAAA;AAChC,UAAA;AAAA,QACF,KAAK,OAAA;AACH,UAAA,MAAM,EAAE,MAAM,OAAA,EAAS,KAAA,EAAO,MAAM,KAAA,EAAO,OAAA,EAAS,KAAA,CAAM,KAAA,CAAM,OAAA,EAAQ;AACxE,UAAA;AAAA;AACJ,IACF;AAGA,IAAA,IAAI,WAAA;AACJ,IAAA,IAAI,KAAK,MAAA,CAAO,GAAA,KAAQ,KAAA,IAAS,IAAA,CAAK,OAAO,YAAA,EAAc;AACzD,MAAA,IAAI,IAAA,CAAK,IAAI,gBAAA,EAAkB;AAC7B,QAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,GAAA,CAAI,gBAAA,CAAiB,YAAY,CAAA,EAAG;AACjE,UAAA,MAAM,EAAE,IAAA,EAAM,aAAA,EAAe,IAAA,EAAM,KAAA,EAAM;AACzC,UAAA,IAAI,CAAC,aAAa,WAAA,GAAc,KAAA;AAAA,6BACb,MAAA,CAAO,MAAA,CAAO,CAAC,WAAA,EAAa,KAAK,CAAC,CAAA;AAAA,QACvD;AAAA,MACF,CAAA,MAAO;AACL,QAAA,WAAA,GAAc,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,YAAY,CAAA;AACpD,QAAA,MAAM,EAAE,IAAA,EAAM,aAAA,EAAe,IAAA,EAAM,WAAA,EAAY;AAAA,MACjD;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,IAAA,EAAM,MAAA,EAAQ,YAAY,QAAA,EAAU,YAAA,EAAc,OAAO,WAAA,EAAY;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,SAAA,GAAY;AACV,IAAA,OAAO,CAAC,EAAA,KAGF;AACJ,MAAA,EAAA,CAAG,EAAA,CAAG,SAAA,EAAW,OAAO,IAAA,KAAkB;AACxC,QAAA,MAAM,KAAA,GAAQ,IAAA,YAAgB,MAAA,GAAS,IAAA,GACnC,IAAA,YAAgB,WAAA,GAAc,MAAA,CAAO,IAAA,CAAK,IAAI,CAAA,GAC9C,MAAA,CAAO,IAAA,CAAK,IAAkB,CAAA;AAElC,QAAA,IAAI;AACF,UAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,aAAA,CAAc,KAAK,CAAA,EAAG;AACnD,YAAA,EAAA,CAAG,IAAA,CAAK,IAAA,CAAK,SAAA,CAAU,KAAK,CAAC,CAAA;AAAA,UAC/B;AAAA,QACF,SAAS,KAAA,EAAO;AACd,UAAA,EAAA,CAAG,IAAA,CAAK,KAAK,SAAA,CAAU;AAAA,YACrB,IAAA,EAAM,OAAA;AAAA,YACN,KAAA,EAAO,EAAE,OAAA,EAAU,KAAA,CAAgB,OAAA,EAAQ;AAAA,YAC3C,SAAU,KAAA,CAAgB;AAAA,WAC3B,CAAC,CAAA;AAAA,QACJ;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAA;AAAA,EACF;AACF;;;AChKO,IAAM,mBAAN,MAA6C;AAAA,EAC1C,MAAA;AAAA,EACA,KAAA;AAAA,EACA,OAAA;AAAA,EACA,SAAA;AAAA,EAER,YAAY,MAAA,EAAyB;AACnC,IAAA,IAAA,CAAK,SAAS,MAAA,CAAO,MAAA;AACrB,IAAA,IAAA,CAAK,KAAA,GAAQ,OAAO,KAAA,IAAS,WAAA;AAC7B,IAAA,IAAA,CAAK,OAAA,GAAU,OAAO,OAAA,IAAW,2BAAA;AACjC,IAAA,IAAA,CAAK,SAAA,GAAY,OAAO,SAAA,IAAa,IAAA;AAAA,EACvC;AAAA,EAEA,MAAM,UAAA,CAAW,KAAA,EAA6B,MAAA,GAAS,WAAA,EAA8B;AACnF,IAAA,MAAM,UAAA,GAAa,IAAI,eAAA,EAAgB;AACvC,IAAA,MAAM,UAAU,UAAA,CAAW,MAAM,WAAW,KAAA,EAAM,EAAG,KAAK,SAAS,CAAA;AAEnE,IAAA,MAAM,GAAA,GAAM,MAAA,CAAO,QAAA,CAAS,MAAM,IAAI,MAAA,GAClC,MAAA,CAAO,QAAA,CAAS,KAAK,IAAI,KAAA,GACzB,MAAA,CAAO,QAAA,CAAS,KAAK,IAAI,KAAA,GACzB,KAAA;AAEJ,IAAA,MAAM,SAAA,GAAY,KAAA,YAAiB,MAAA,GAAS,KAAA,CAAM,MAAA,CAAO,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,KAAA,CAAM,UAAA,GAAa,KAAA,CAAM,UAAU,CAAA,GAAI,KAAA;AACxH,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,SAAqB,CAAA,EAAG,EAAE,IAAA,EAAM,MAAA,EAAQ,CAAA;AAC/D,IAAA,MAAM,QAAA,GAAW,IAAI,QAAA,EAAS;AAC9B,IAAA,QAAA,CAAS,MAAA,CAAO,MAAA,EAAQ,IAAA,EAAM,CAAA,MAAA,EAAS,GAAG,CAAA,CAAE,CAAA;AAC5C,IAAA,QAAA,CAAS,MAAA,CAAO,OAAA,EAAS,IAAA,CAAK,KAAK,CAAA;AACnC,IAAA,QAAA,CAAS,MAAA,CAAO,mBAAmB,MAAM,CAAA;AAEzC,IAAA,IAAI,QAAA;AACJ,IAAA,IAAI;AACF,MAAA,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,IAAA,CAAK,OAAO,CAAA,qBAAA,CAAA,EAAyB;AAAA,QAC7D,MAAA,EAAQ,MAAA;AAAA,QACR,QAAQ,UAAA,CAAW,MAAA;AAAA,QACnB,SAAS,EAAE,eAAA,EAAiB,CAAA,OAAA,EAAU,IAAA,CAAK,MAAM,CAAA,CAAA,EAAG;AAAA,QACpD,IAAA,EAAM;AAAA,OACP,CAAA;AAAA,IACH,SAAS,KAAA,EAAO;AACd,MAAA,YAAA,CAAa,OAAO,CAAA;AACpB,MAAA,IAAK,KAAA,CAAgB,SAAS,YAAA,EAAc;AAC1C,QAAA,MAAM,IAAI,KAAA,CAAM,CAAA,oCAAA,EAAuC,IAAA,CAAK,SAAS,CAAA,EAAA,CAAI,CAAA;AAAA,MAC3E;AACA,MAAA,MAAM,KAAA;AAAA,IACR,CAAA,SAAE;AACA,MAAA,YAAA,CAAa,OAAO,CAAA;AAAA,IACtB;AAEA,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,IAAA,EAAK;AAChC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,mBAAA,EAAsB,SAAS,MAAM,CAAA,GAAA,EAAM,GAAG,CAAA,CAAE,CAAA;AAAA,IAClE;AAEA,IAAA,OAAO,SAAS,IAAA,EAAK;AAAA,EACvB;AACF;;;ACrDO,IAAM,mBAAN,MAA6C;AAAA,EAC1C,MAAA;AAAA,EACA,KAAA;AAAA,EACA,YAAA;AAAA,EACA,OAAA;AAAA,EACA,SAAA;AAAA,EAER,YAAY,MAAA,EAAyB;AACnC,IAAA,IAAA,CAAK,SAAS,MAAA,CAAO,MAAA;AACrB,IAAA,IAAA,CAAK,KAAA,GAAQ,OAAO,KAAA,IAAS,OAAA;AAC7B,IAAA,IAAA,CAAK,YAAA,GAAe,OAAO,KAAA,IAAS,OAAA;AACpC,IAAA,IAAA,CAAK,OAAA,GAAU,OAAO,OAAA,IAAW,2BAAA;AACjC,IAAA,IAAA,CAAK,SAAA,GAAY,OAAO,SAAA,IAAa,GAAA;AAAA,EACvC;AAAA,EAEA,MAAM,UAAA,CAAW,IAAA,EAAc,OAAA,GAAgC,EAAC,EAAoB;AAClF,IAAA,MAAM,UAAA,GAAa,IAAI,eAAA,EAAgB;AACvC,IAAA,MAAM,UAAU,UAAA,CAAW,MAAM,WAAW,KAAA,EAAM,EAAG,KAAK,SAAS,CAAA;AAEnE,IAAA,IAAI,QAAA;AACJ,IAAA,IAAI;AACF,MAAA,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,IAAA,CAAK,OAAO,CAAA,aAAA,CAAA,EAAiB;AAAA,QACrD,MAAA,EAAQ,MAAA;AAAA,QACR,QAAQ,UAAA,CAAW,MAAA;AAAA,QACnB,OAAA,EAAS;AAAA,UACP,cAAA,EAAgB,kBAAA;AAAA,UAChB,eAAA,EAAiB,CAAA,OAAA,EAAU,IAAA,CAAK,MAAM,CAAA;AAAA,SACxC;AAAA,QACA,IAAA,EAAM,KAAK,SAAA,CAAU;AAAA,UACnB,OAAO,IAAA,CAAK,KAAA;AAAA,UACZ,KAAA,EAAO,IAAA;AAAA,UACP,KAAA,EAAO,OAAA,CAAQ,KAAA,IAAS,IAAA,CAAK,YAAA;AAAA,UAC7B,eAAA,EAAiB,QAAQ,MAAA,IAAU,KAAA;AAAA,UACnC,KAAA,EAAO,QAAQ,KAAA,IAAS;AAAA,SACzB;AAAA,OACF,CAAA;AAAA,IACH,SAAS,KAAA,EAAO;AACd,MAAA,YAAA,CAAa,OAAO,CAAA;AACpB,MAAA,IAAK,KAAA,CAAgB,SAAS,YAAA,EAAc;AAC1C,QAAA,MAAM,IAAI,KAAA,CAAM,CAAA,gCAAA,EAAmC,IAAA,CAAK,SAAS,CAAA,EAAA,CAAI,CAAA;AAAA,MACvE;AACA,MAAA,MAAM,KAAA;AAAA,IACR,CAAA,SAAE;AACA,MAAA,YAAA,CAAa,OAAO,CAAA;AAAA,IACtB;AAEA,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,IAAA,EAAK;AAChC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,eAAA,EAAkB,SAAS,MAAM,CAAA,GAAA,EAAM,GAAG,CAAA,CAAE,CAAA;AAAA,IAC9D;AAEA,IAAA,MAAM,WAAA,GAAc,MAAM,QAAA,CAAS,WAAA,EAAY;AAC/C,IAAA,OAAO,MAAA,CAAO,KAAK,WAAW,CAAA;AAAA,EAChC;AAAA,EAEA,OAAO,gBAAA,CAAiB,IAAA,EAAc,OAAA,GAAgC,EAAC,EAA0B;AAE/F,IAAA,MAAM,YAAY,IAAA,CAAK,KAAA,CAAM,gBAAgB,CAAA,IAAK,CAAC,IAAI,CAAA;AACvD,IAAA,KAAA,MAAW,YAAY,SAAA,EAAW;AAChC,MAAA,MAAM,QAAQ,MAAM,IAAA,CAAK,WAAW,QAAA,CAAS,IAAA,IAAQ,OAAO,CAAA;AAC5D,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AACF","file":"index.cjs","sourcesContent":["import type { LLMAdapter } from '@orka-js/core';\nimport { StreamingToolAgent } from '@orka-js/agent';\nimport type { Tool } from '@orka-js/agent';\nimport type {\n STTAdapter,\n TTSAdapter,\n RealtimeAgentConfig,\n RealtimeEvent,\n RealtimeProcessResult,\n} from './types.js';\n\nexport interface RealtimeAgentOptions {\n config: RealtimeAgentConfig;\n llm: LLMAdapter;\n stt: STTAdapter;\n tts?: TTSAdapter;\n tools?: Tool[];\n}\n\n/**\n * A voice agent that processes audio through the STT → LLM → TTS pipeline.\n *\n * @example\n * ```typescript\n * const agent = new RealtimeAgent({\n * config: { goal: 'Answer questions', tts: true },\n * llm: new OpenAIAdapter({ apiKey }),\n * stt: new OpenAISTTAdapter({ apiKey }),\n * tts: new OpenAITTSAdapter({ apiKey }),\n * });\n *\n * const result = await agent.process(audioBuffer);\n * console.log(result.transcript); // what the user said\n * console.log(result.response); // the LLM's reply\n * ```\n */\nexport class RealtimeAgent {\n private config: RealtimeAgentConfig;\n private stt: STTAdapter;\n private tts?: TTSAdapter;\n private innerAgent: StreamingToolAgent;\n\n constructor(options: RealtimeAgentOptions) {\n this.config = options.config;\n this.stt = options.stt;\n this.tts = options.tts;\n\n this.innerAgent = new StreamingToolAgent(\n {\n goal: options.config.goal,\n systemPrompt: options.config.systemPrompt,\n tools: options.tools ?? [],\n },\n options.llm,\n );\n }\n\n /**\n * Process audio: transcribe → run LLM → synthesize.\n * Returns final transcript, response text and audio buffer.\n */\n async process(audio: Buffer | ArrayBuffer, audioFormat = 'audio/wav'): Promise<RealtimeProcessResult> {\n const transcript = await this.stt.transcribe(audio, audioFormat);\n\n let response = '';\n for await (const event of this.innerAgent.runStream(transcript)) {\n if (event.type === 'done') {\n response = event.content;\n }\n }\n\n let audioOutput: Buffer | undefined;\n if (this.config.tts !== false && this.tts && response) {\n audioOutput = await this.tts.synthesize(response);\n }\n\n return { transcript, response, audio: audioOutput };\n }\n\n /**\n * Process audio as a stream of events — yields transcript, tokens, audio chunks, and done.\n */\n async *processStream(\n audio: Buffer | ArrayBuffer,\n audioFormat = 'audio/wav',\n ): AsyncIterable<RealtimeEvent> {\n let transcript: string;\n try {\n transcript = await this.stt.transcribe(audio, audioFormat);\n } catch (error) {\n yield { type: 'error', error: error as Error, message: (error as Error).message };\n return;\n }\n\n yield { type: 'transcript', text: transcript };\n\n let fullResponse = '';\n\n for await (const event of this.innerAgent.runStream(transcript)) {\n switch (event.type) {\n case 'token':\n fullResponse += event.token;\n yield { type: 'token', content: event.token };\n break;\n case 'tool_call':\n yield { type: 'tool_call', name: event.name, args: event.arguments };\n break;\n case 'tool_result':\n yield { type: 'tool_result', name: String(event.toolCallId), result: event.result };\n break;\n case 'done':\n fullResponse = event.content || fullResponse;\n break;\n case 'error':\n yield { type: 'error', error: event.error, message: event.error.message };\n return;\n }\n }\n\n // Synthesize audio\n let audioOutput: Buffer | undefined;\n if (this.config.tts !== false && this.tts && fullResponse) {\n if (this.tts.synthesizeStream) {\n for await (const chunk of this.tts.synthesizeStream(fullResponse)) {\n yield { type: 'audio_chunk', data: chunk };\n if (!audioOutput) audioOutput = chunk;\n else audioOutput = Buffer.concat([audioOutput, chunk]);\n }\n } else {\n audioOutput = await this.tts.synthesize(fullResponse);\n yield { type: 'audio_chunk', data: audioOutput };\n }\n }\n\n yield { type: 'done', transcript, response: fullResponse, audio: audioOutput };\n }\n\n /**\n * Returns an Express/Node.js-compatible WebSocket message handler.\n * Expects binary audio messages from the client, sends back JSON events.\n *\n * @example\n * ```typescript\n * import { WebSocketServer } from 'ws';\n * const wss = new WebSocketServer({ port: 8080 });\n * wss.on('connection', agent.wsHandler());\n * ```\n */\n wsHandler() {\n return (ws: {\n on(event: string, listener: (...args: unknown[]) => void): void;\n send(data: string): void;\n }) => {\n ws.on('message', async (data: unknown) => {\n const audio = data instanceof Buffer ? data\n : data instanceof ArrayBuffer ? Buffer.from(data)\n : Buffer.from(data as Uint8Array);\n\n try {\n for await (const event of this.processStream(audio)) {\n ws.send(JSON.stringify(event));\n }\n } catch (error) {\n ws.send(JSON.stringify({\n type: 'error',\n error: { message: (error as Error).message },\n message: (error as Error).message,\n }));\n }\n });\n };\n }\n}\n","import type { STTAdapter } from '../types.js';\n\nexport interface OpenAISTTConfig {\n apiKey: string;\n model?: string;\n baseURL?: string;\n timeoutMs?: number;\n}\n\n/**\n * STT adapter using OpenAI Whisper API.\n */\nexport class OpenAISTTAdapter implements STTAdapter {\n private apiKey: string;\n private model: string;\n private baseURL: string;\n private timeoutMs: number;\n\n constructor(config: OpenAISTTConfig) {\n this.apiKey = config.apiKey;\n this.model = config.model ?? 'whisper-1';\n this.baseURL = config.baseURL ?? 'https://api.openai.com/v1';\n this.timeoutMs = config.timeoutMs ?? 120_000;\n }\n\n async transcribe(audio: Buffer | ArrayBuffer, format = 'audio/wav'): Promise<string> {\n const controller = new AbortController();\n const timeout = setTimeout(() => controller.abort(), this.timeoutMs);\n\n const ext = format.includes('webm') ? 'webm'\n : format.includes('mp3') ? 'mp3'\n : format.includes('ogg') ? 'ogg'\n : 'wav';\n\n const audioData = audio instanceof Buffer ? audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength) : audio;\n const blob = new Blob([audioData as BlobPart], { type: format });\n const formData = new FormData();\n formData.append('file', blob, `audio.${ext}`);\n formData.append('model', this.model);\n formData.append('response_format', 'text');\n\n let response: Response;\n try {\n response = await fetch(`${this.baseURL}/audio/transcriptions`, {\n method: 'POST',\n signal: controller.signal,\n headers: { 'Authorization': `Bearer ${this.apiKey}` },\n body: formData,\n });\n } catch (error) {\n clearTimeout(timeout);\n if ((error as Error).name === 'AbortError') {\n throw new Error(`Whisper API request timed out after ${this.timeoutMs}ms`);\n }\n throw error;\n } finally {\n clearTimeout(timeout);\n }\n\n if (!response.ok) {\n const err = await response.text();\n throw new Error(`Whisper API error: ${response.status} - ${err}`);\n }\n\n return response.text();\n }\n}\n","import type { TTSAdapter, TTSSynthesizeOptions } from '../types.js';\n\nexport interface OpenAITTSConfig {\n apiKey: string;\n model?: string;\n voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';\n baseURL?: string;\n timeoutMs?: number;\n}\n\n/**\n * TTS adapter using OpenAI TTS API.\n */\nexport class OpenAITTSAdapter implements TTSAdapter {\n private apiKey: string;\n private model: string;\n private defaultVoice: string;\n private baseURL: string;\n private timeoutMs: number;\n\n constructor(config: OpenAITTSConfig) {\n this.apiKey = config.apiKey;\n this.model = config.model ?? 'tts-1';\n this.defaultVoice = config.voice ?? 'alloy';\n this.baseURL = config.baseURL ?? 'https://api.openai.com/v1';\n this.timeoutMs = config.timeoutMs ?? 60_000;\n }\n\n async synthesize(text: string, options: TTSSynthesizeOptions = {}): Promise<Buffer> {\n const controller = new AbortController();\n const timeout = setTimeout(() => controller.abort(), this.timeoutMs);\n\n let response: Response;\n try {\n response = await fetch(`${this.baseURL}/audio/speech`, {\n method: 'POST',\n signal: controller.signal,\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this.apiKey}`,\n },\n body: JSON.stringify({\n model: this.model,\n input: text,\n voice: options.voice ?? this.defaultVoice,\n response_format: options.format ?? 'mp3',\n speed: options.speed ?? 1.0,\n }),\n });\n } catch (error) {\n clearTimeout(timeout);\n if ((error as Error).name === 'AbortError') {\n throw new Error(`TTS API request timed out after ${this.timeoutMs}ms`);\n }\n throw error;\n } finally {\n clearTimeout(timeout);\n }\n\n if (!response.ok) {\n const err = await response.text();\n throw new Error(`TTS API error: ${response.status} - ${err}`);\n }\n\n const arrayBuffer = await response.arrayBuffer();\n return Buffer.from(arrayBuffer);\n }\n\n async *synthesizeStream(text: string, options: TTSSynthesizeOptions = {}): AsyncIterable<Buffer> {\n // Split text into sentences for lower latency\n const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text];\n for (const sentence of sentences) {\n const audio = await this.synthesize(sentence.trim(), options);\n yield audio;\n }\n }\n}\n"]}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { LLMAdapter } from '@orka-js/core';
|
|
2
|
+
import { Tool } from '@orka-js/agent';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Adapter for Speech-to-Text (audio transcription).
|
|
6
|
+
*/
|
|
7
|
+
interface STTAdapter {
|
|
8
|
+
/**
|
|
9
|
+
* Transcribe audio buffer to text.
|
|
10
|
+
* @param audio Raw audio bytes
|
|
11
|
+
* @param format Audio MIME type (e.g. 'audio/webm', 'audio/wav', 'audio/mp3')
|
|
12
|
+
*/
|
|
13
|
+
transcribe(audio: Buffer | ArrayBuffer, format?: string): Promise<string>;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Adapter for Text-to-Speech (audio synthesis).
|
|
17
|
+
*/
|
|
18
|
+
interface TTSAdapter {
|
|
19
|
+
/**
|
|
20
|
+
* Synthesize text to audio buffer.
|
|
21
|
+
*/
|
|
22
|
+
synthesize(text: string, options?: TTSSynthesizeOptions): Promise<Buffer>;
|
|
23
|
+
/**
|
|
24
|
+
* Synthesize text to audio as a stream of chunks (optional).
|
|
25
|
+
*/
|
|
26
|
+
synthesizeStream?(text: string, options?: TTSSynthesizeOptions): AsyncIterable<Buffer>;
|
|
27
|
+
}
|
|
28
|
+
interface TTSSynthesizeOptions {
|
|
29
|
+
voice?: string;
|
|
30
|
+
speed?: number;
|
|
31
|
+
/** Output format: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' */
|
|
32
|
+
format?: string;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Events emitted by RealtimeAgent during a voice interaction.
|
|
36
|
+
*/
|
|
37
|
+
type RealtimeEvent = {
|
|
38
|
+
type: 'transcript';
|
|
39
|
+
text: string;
|
|
40
|
+
} | {
|
|
41
|
+
type: 'token';
|
|
42
|
+
content: string;
|
|
43
|
+
} | {
|
|
44
|
+
type: 'tool_call';
|
|
45
|
+
name: string;
|
|
46
|
+
args: unknown;
|
|
47
|
+
} | {
|
|
48
|
+
type: 'tool_result';
|
|
49
|
+
name: string;
|
|
50
|
+
result: unknown;
|
|
51
|
+
} | {
|
|
52
|
+
type: 'audio_chunk';
|
|
53
|
+
data: Buffer;
|
|
54
|
+
} | {
|
|
55
|
+
type: 'done';
|
|
56
|
+
transcript: string;
|
|
57
|
+
response: string;
|
|
58
|
+
audio?: Buffer;
|
|
59
|
+
} | {
|
|
60
|
+
type: 'error';
|
|
61
|
+
error: Error;
|
|
62
|
+
message: string;
|
|
63
|
+
};
|
|
64
|
+
interface RealtimeAgentConfig {
|
|
65
|
+
/** System goal / personality for the voice agent */
|
|
66
|
+
goal: string;
|
|
67
|
+
/** Custom system prompt injected into the LLM */
|
|
68
|
+
systemPrompt?: string;
|
|
69
|
+
/** Language for STT (ISO-639-1, e.g. 'en', 'fr') */
|
|
70
|
+
language?: string;
|
|
71
|
+
/** Whether to synthesize audio output */
|
|
72
|
+
tts?: boolean;
|
|
73
|
+
}
|
|
74
|
+
interface RealtimeProcessResult {
|
|
75
|
+
/** Transcribed user speech */
|
|
76
|
+
transcript: string;
|
|
77
|
+
/** LLM text response */
|
|
78
|
+
response: string;
|
|
79
|
+
/** Synthesized audio (if tts is enabled) */
|
|
80
|
+
audio?: Buffer;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface RealtimeAgentOptions {
|
|
84
|
+
config: RealtimeAgentConfig;
|
|
85
|
+
llm: LLMAdapter;
|
|
86
|
+
stt: STTAdapter;
|
|
87
|
+
tts?: TTSAdapter;
|
|
88
|
+
tools?: Tool[];
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* A voice agent that processes audio through the STT → LLM → TTS pipeline.
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const agent = new RealtimeAgent({
|
|
96
|
+
* config: { goal: 'Answer questions', tts: true },
|
|
97
|
+
* llm: new OpenAIAdapter({ apiKey }),
|
|
98
|
+
* stt: new OpenAISTTAdapter({ apiKey }),
|
|
99
|
+
* tts: new OpenAITTSAdapter({ apiKey }),
|
|
100
|
+
* });
|
|
101
|
+
*
|
|
102
|
+
* const result = await agent.process(audioBuffer);
|
|
103
|
+
* console.log(result.transcript); // what the user said
|
|
104
|
+
* console.log(result.response); // the LLM's reply
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
declare class RealtimeAgent {
|
|
108
|
+
private config;
|
|
109
|
+
private stt;
|
|
110
|
+
private tts?;
|
|
111
|
+
private innerAgent;
|
|
112
|
+
constructor(options: RealtimeAgentOptions);
|
|
113
|
+
/**
|
|
114
|
+
* Process audio: transcribe → run LLM → synthesize.
|
|
115
|
+
* Returns final transcript, response text and audio buffer.
|
|
116
|
+
*/
|
|
117
|
+
process(audio: Buffer | ArrayBuffer, audioFormat?: string): Promise<RealtimeProcessResult>;
|
|
118
|
+
/**
|
|
119
|
+
* Process audio as a stream of events — yields transcript, tokens, audio chunks, and done.
|
|
120
|
+
*/
|
|
121
|
+
processStream(audio: Buffer | ArrayBuffer, audioFormat?: string): AsyncIterable<RealtimeEvent>;
|
|
122
|
+
/**
|
|
123
|
+
* Returns an Express/Node.js-compatible WebSocket message handler.
|
|
124
|
+
* Expects binary audio messages from the client, sends back JSON events.
|
|
125
|
+
*
|
|
126
|
+
* @example
|
|
127
|
+
* ```typescript
|
|
128
|
+
* import { WebSocketServer } from 'ws';
|
|
129
|
+
* const wss = new WebSocketServer({ port: 8080 });
|
|
130
|
+
* wss.on('connection', agent.wsHandler());
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
wsHandler(): (ws: {
|
|
134
|
+
on(event: string, listener: (...args: unknown[]) => void): void;
|
|
135
|
+
send(data: string): void;
|
|
136
|
+
}) => void;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
interface OpenAISTTConfig {
|
|
140
|
+
apiKey: string;
|
|
141
|
+
model?: string;
|
|
142
|
+
baseURL?: string;
|
|
143
|
+
timeoutMs?: number;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* STT adapter using OpenAI Whisper API.
|
|
147
|
+
*/
|
|
148
|
+
declare class OpenAISTTAdapter implements STTAdapter {
|
|
149
|
+
private apiKey;
|
|
150
|
+
private model;
|
|
151
|
+
private baseURL;
|
|
152
|
+
private timeoutMs;
|
|
153
|
+
constructor(config: OpenAISTTConfig);
|
|
154
|
+
transcribe(audio: Buffer | ArrayBuffer, format?: string): Promise<string>;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
interface OpenAITTSConfig {
|
|
158
|
+
apiKey: string;
|
|
159
|
+
model?: string;
|
|
160
|
+
voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
|
|
161
|
+
baseURL?: string;
|
|
162
|
+
timeoutMs?: number;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* TTS adapter using OpenAI TTS API.
|
|
166
|
+
*/
|
|
167
|
+
declare class OpenAITTSAdapter implements TTSAdapter {
|
|
168
|
+
private apiKey;
|
|
169
|
+
private model;
|
|
170
|
+
private defaultVoice;
|
|
171
|
+
private baseURL;
|
|
172
|
+
private timeoutMs;
|
|
173
|
+
constructor(config: OpenAITTSConfig);
|
|
174
|
+
synthesize(text: string, options?: TTSSynthesizeOptions): Promise<Buffer>;
|
|
175
|
+
synthesizeStream(text: string, options?: TTSSynthesizeOptions): AsyncIterable<Buffer>;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export { OpenAISTTAdapter, type OpenAISTTConfig, OpenAITTSAdapter, type OpenAITTSConfig, RealtimeAgent, type RealtimeAgentConfig, type RealtimeAgentOptions, type RealtimeEvent, type RealtimeProcessResult, type STTAdapter, type TTSAdapter, type TTSSynthesizeOptions };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
import { LLMAdapter } from '@orka-js/core';
|
|
2
|
+
import { Tool } from '@orka-js/agent';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Adapter for Speech-to-Text (audio transcription).
|
|
6
|
+
*/
|
|
7
|
+
interface STTAdapter {
|
|
8
|
+
/**
|
|
9
|
+
* Transcribe audio buffer to text.
|
|
10
|
+
* @param audio Raw audio bytes
|
|
11
|
+
* @param format Audio MIME type (e.g. 'audio/webm', 'audio/wav', 'audio/mp3')
|
|
12
|
+
*/
|
|
13
|
+
transcribe(audio: Buffer | ArrayBuffer, format?: string): Promise<string>;
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* Adapter for Text-to-Speech (audio synthesis).
|
|
17
|
+
*/
|
|
18
|
+
interface TTSAdapter {
|
|
19
|
+
/**
|
|
20
|
+
* Synthesize text to audio buffer.
|
|
21
|
+
*/
|
|
22
|
+
synthesize(text: string, options?: TTSSynthesizeOptions): Promise<Buffer>;
|
|
23
|
+
/**
|
|
24
|
+
* Synthesize text to audio as a stream of chunks (optional).
|
|
25
|
+
*/
|
|
26
|
+
synthesizeStream?(text: string, options?: TTSSynthesizeOptions): AsyncIterable<Buffer>;
|
|
27
|
+
}
|
|
28
|
+
interface TTSSynthesizeOptions {
|
|
29
|
+
voice?: string;
|
|
30
|
+
speed?: number;
|
|
31
|
+
/** Output format: 'mp3' | 'opus' | 'aac' | 'flac' | 'wav' */
|
|
32
|
+
format?: string;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Events emitted by RealtimeAgent during a voice interaction.
|
|
36
|
+
*/
|
|
37
|
+
type RealtimeEvent = {
|
|
38
|
+
type: 'transcript';
|
|
39
|
+
text: string;
|
|
40
|
+
} | {
|
|
41
|
+
type: 'token';
|
|
42
|
+
content: string;
|
|
43
|
+
} | {
|
|
44
|
+
type: 'tool_call';
|
|
45
|
+
name: string;
|
|
46
|
+
args: unknown;
|
|
47
|
+
} | {
|
|
48
|
+
type: 'tool_result';
|
|
49
|
+
name: string;
|
|
50
|
+
result: unknown;
|
|
51
|
+
} | {
|
|
52
|
+
type: 'audio_chunk';
|
|
53
|
+
data: Buffer;
|
|
54
|
+
} | {
|
|
55
|
+
type: 'done';
|
|
56
|
+
transcript: string;
|
|
57
|
+
response: string;
|
|
58
|
+
audio?: Buffer;
|
|
59
|
+
} | {
|
|
60
|
+
type: 'error';
|
|
61
|
+
error: Error;
|
|
62
|
+
message: string;
|
|
63
|
+
};
|
|
64
|
+
interface RealtimeAgentConfig {
|
|
65
|
+
/** System goal / personality for the voice agent */
|
|
66
|
+
goal: string;
|
|
67
|
+
/** Custom system prompt injected into the LLM */
|
|
68
|
+
systemPrompt?: string;
|
|
69
|
+
/** Language for STT (ISO-639-1, e.g. 'en', 'fr') */
|
|
70
|
+
language?: string;
|
|
71
|
+
/** Whether to synthesize audio output */
|
|
72
|
+
tts?: boolean;
|
|
73
|
+
}
|
|
74
|
+
interface RealtimeProcessResult {
|
|
75
|
+
/** Transcribed user speech */
|
|
76
|
+
transcript: string;
|
|
77
|
+
/** LLM text response */
|
|
78
|
+
response: string;
|
|
79
|
+
/** Synthesized audio (if tts is enabled) */
|
|
80
|
+
audio?: Buffer;
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
interface RealtimeAgentOptions {
|
|
84
|
+
config: RealtimeAgentConfig;
|
|
85
|
+
llm: LLMAdapter;
|
|
86
|
+
stt: STTAdapter;
|
|
87
|
+
tts?: TTSAdapter;
|
|
88
|
+
tools?: Tool[];
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* A voice agent that processes audio through the STT → LLM → TTS pipeline.
|
|
92
|
+
*
|
|
93
|
+
* @example
|
|
94
|
+
* ```typescript
|
|
95
|
+
* const agent = new RealtimeAgent({
|
|
96
|
+
* config: { goal: 'Answer questions', tts: true },
|
|
97
|
+
* llm: new OpenAIAdapter({ apiKey }),
|
|
98
|
+
* stt: new OpenAISTTAdapter({ apiKey }),
|
|
99
|
+
* tts: new OpenAITTSAdapter({ apiKey }),
|
|
100
|
+
* });
|
|
101
|
+
*
|
|
102
|
+
* const result = await agent.process(audioBuffer);
|
|
103
|
+
* console.log(result.transcript); // what the user said
|
|
104
|
+
* console.log(result.response); // the LLM's reply
|
|
105
|
+
* ```
|
|
106
|
+
*/
|
|
107
|
+
declare class RealtimeAgent {
|
|
108
|
+
private config;
|
|
109
|
+
private stt;
|
|
110
|
+
private tts?;
|
|
111
|
+
private innerAgent;
|
|
112
|
+
constructor(options: RealtimeAgentOptions);
|
|
113
|
+
/**
|
|
114
|
+
* Process audio: transcribe → run LLM → synthesize.
|
|
115
|
+
* Returns final transcript, response text and audio buffer.
|
|
116
|
+
*/
|
|
117
|
+
process(audio: Buffer | ArrayBuffer, audioFormat?: string): Promise<RealtimeProcessResult>;
|
|
118
|
+
/**
|
|
119
|
+
* Process audio as a stream of events — yields transcript, tokens, audio chunks, and done.
|
|
120
|
+
*/
|
|
121
|
+
processStream(audio: Buffer | ArrayBuffer, audioFormat?: string): AsyncIterable<RealtimeEvent>;
|
|
122
|
+
/**
|
|
123
|
+
* Returns an Express/Node.js-compatible WebSocket message handler.
|
|
124
|
+
* Expects binary audio messages from the client, sends back JSON events.
|
|
125
|
+
*
|
|
126
|
+
* @example
|
|
127
|
+
* ```typescript
|
|
128
|
+
* import { WebSocketServer } from 'ws';
|
|
129
|
+
* const wss = new WebSocketServer({ port: 8080 });
|
|
130
|
+
* wss.on('connection', agent.wsHandler());
|
|
131
|
+
* ```
|
|
132
|
+
*/
|
|
133
|
+
wsHandler(): (ws: {
|
|
134
|
+
on(event: string, listener: (...args: unknown[]) => void): void;
|
|
135
|
+
send(data: string): void;
|
|
136
|
+
}) => void;
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
interface OpenAISTTConfig {
|
|
140
|
+
apiKey: string;
|
|
141
|
+
model?: string;
|
|
142
|
+
baseURL?: string;
|
|
143
|
+
timeoutMs?: number;
|
|
144
|
+
}
|
|
145
|
+
/**
|
|
146
|
+
* STT adapter using OpenAI Whisper API.
|
|
147
|
+
*/
|
|
148
|
+
declare class OpenAISTTAdapter implements STTAdapter {
|
|
149
|
+
private apiKey;
|
|
150
|
+
private model;
|
|
151
|
+
private baseURL;
|
|
152
|
+
private timeoutMs;
|
|
153
|
+
constructor(config: OpenAISTTConfig);
|
|
154
|
+
transcribe(audio: Buffer | ArrayBuffer, format?: string): Promise<string>;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
interface OpenAITTSConfig {
|
|
158
|
+
apiKey: string;
|
|
159
|
+
model?: string;
|
|
160
|
+
voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';
|
|
161
|
+
baseURL?: string;
|
|
162
|
+
timeoutMs?: number;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* TTS adapter using OpenAI TTS API.
|
|
166
|
+
*/
|
|
167
|
+
declare class OpenAITTSAdapter implements TTSAdapter {
|
|
168
|
+
private apiKey;
|
|
169
|
+
private model;
|
|
170
|
+
private defaultVoice;
|
|
171
|
+
private baseURL;
|
|
172
|
+
private timeoutMs;
|
|
173
|
+
constructor(config: OpenAITTSConfig);
|
|
174
|
+
synthesize(text: string, options?: TTSSynthesizeOptions): Promise<Buffer>;
|
|
175
|
+
synthesizeStream(text: string, options?: TTSSynthesizeOptions): AsyncIterable<Buffer>;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
export { OpenAISTTAdapter, type OpenAISTTConfig, OpenAITTSAdapter, type OpenAITTSConfig, RealtimeAgent, type RealtimeAgentConfig, type RealtimeAgentOptions, type RealtimeEvent, type RealtimeProcessResult, type STTAdapter, type TTSAdapter, type TTSSynthesizeOptions };
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,227 @@
|
|
|
1
|
+
import { StreamingToolAgent } from '@orka-js/agent';
|
|
2
|
+
|
|
3
|
+
// src/realtime-agent.ts
|
|
4
|
+
var RealtimeAgent = class {
|
|
5
|
+
config;
|
|
6
|
+
stt;
|
|
7
|
+
tts;
|
|
8
|
+
innerAgent;
|
|
9
|
+
constructor(options) {
|
|
10
|
+
this.config = options.config;
|
|
11
|
+
this.stt = options.stt;
|
|
12
|
+
this.tts = options.tts;
|
|
13
|
+
this.innerAgent = new StreamingToolAgent(
|
|
14
|
+
{
|
|
15
|
+
goal: options.config.goal,
|
|
16
|
+
systemPrompt: options.config.systemPrompt,
|
|
17
|
+
tools: options.tools ?? []
|
|
18
|
+
},
|
|
19
|
+
options.llm
|
|
20
|
+
);
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Process audio: transcribe → run LLM → synthesize.
|
|
24
|
+
* Returns final transcript, response text and audio buffer.
|
|
25
|
+
*/
|
|
26
|
+
async process(audio, audioFormat = "audio/wav") {
|
|
27
|
+
const transcript = await this.stt.transcribe(audio, audioFormat);
|
|
28
|
+
let response = "";
|
|
29
|
+
for await (const event of this.innerAgent.runStream(transcript)) {
|
|
30
|
+
if (event.type === "done") {
|
|
31
|
+
response = event.content;
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
let audioOutput;
|
|
35
|
+
if (this.config.tts !== false && this.tts && response) {
|
|
36
|
+
audioOutput = await this.tts.synthesize(response);
|
|
37
|
+
}
|
|
38
|
+
return { transcript, response, audio: audioOutput };
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Process audio as a stream of events — yields transcript, tokens, audio chunks, and done.
|
|
42
|
+
*/
|
|
43
|
+
async *processStream(audio, audioFormat = "audio/wav") {
|
|
44
|
+
let transcript;
|
|
45
|
+
try {
|
|
46
|
+
transcript = await this.stt.transcribe(audio, audioFormat);
|
|
47
|
+
} catch (error) {
|
|
48
|
+
yield { type: "error", error, message: error.message };
|
|
49
|
+
return;
|
|
50
|
+
}
|
|
51
|
+
yield { type: "transcript", text: transcript };
|
|
52
|
+
let fullResponse = "";
|
|
53
|
+
for await (const event of this.innerAgent.runStream(transcript)) {
|
|
54
|
+
switch (event.type) {
|
|
55
|
+
case "token":
|
|
56
|
+
fullResponse += event.token;
|
|
57
|
+
yield { type: "token", content: event.token };
|
|
58
|
+
break;
|
|
59
|
+
case "tool_call":
|
|
60
|
+
yield { type: "tool_call", name: event.name, args: event.arguments };
|
|
61
|
+
break;
|
|
62
|
+
case "tool_result":
|
|
63
|
+
yield { type: "tool_result", name: String(event.toolCallId), result: event.result };
|
|
64
|
+
break;
|
|
65
|
+
case "done":
|
|
66
|
+
fullResponse = event.content || fullResponse;
|
|
67
|
+
break;
|
|
68
|
+
case "error":
|
|
69
|
+
yield { type: "error", error: event.error, message: event.error.message };
|
|
70
|
+
return;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
let audioOutput;
|
|
74
|
+
if (this.config.tts !== false && this.tts && fullResponse) {
|
|
75
|
+
if (this.tts.synthesizeStream) {
|
|
76
|
+
for await (const chunk of this.tts.synthesizeStream(fullResponse)) {
|
|
77
|
+
yield { type: "audio_chunk", data: chunk };
|
|
78
|
+
if (!audioOutput) audioOutput = chunk;
|
|
79
|
+
else audioOutput = Buffer.concat([audioOutput, chunk]);
|
|
80
|
+
}
|
|
81
|
+
} else {
|
|
82
|
+
audioOutput = await this.tts.synthesize(fullResponse);
|
|
83
|
+
yield { type: "audio_chunk", data: audioOutput };
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
yield { type: "done", transcript, response: fullResponse, audio: audioOutput };
|
|
87
|
+
}
|
|
88
|
+
/**
|
|
89
|
+
* Returns an Express/Node.js-compatible WebSocket message handler.
|
|
90
|
+
* Expects binary audio messages from the client, sends back JSON events.
|
|
91
|
+
*
|
|
92
|
+
* @example
|
|
93
|
+
* ```typescript
|
|
94
|
+
* import { WebSocketServer } from 'ws';
|
|
95
|
+
* const wss = new WebSocketServer({ port: 8080 });
|
|
96
|
+
* wss.on('connection', agent.wsHandler());
|
|
97
|
+
* ```
|
|
98
|
+
*/
|
|
99
|
+
wsHandler() {
|
|
100
|
+
return (ws) => {
|
|
101
|
+
ws.on("message", async (data) => {
|
|
102
|
+
const audio = data instanceof Buffer ? data : data instanceof ArrayBuffer ? Buffer.from(data) : Buffer.from(data);
|
|
103
|
+
try {
|
|
104
|
+
for await (const event of this.processStream(audio)) {
|
|
105
|
+
ws.send(JSON.stringify(event));
|
|
106
|
+
}
|
|
107
|
+
} catch (error) {
|
|
108
|
+
ws.send(JSON.stringify({
|
|
109
|
+
type: "error",
|
|
110
|
+
error: { message: error.message },
|
|
111
|
+
message: error.message
|
|
112
|
+
}));
|
|
113
|
+
}
|
|
114
|
+
});
|
|
115
|
+
};
|
|
116
|
+
}
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
// src/adapters/openai-stt.ts
|
|
120
|
+
var OpenAISTTAdapter = class {
|
|
121
|
+
apiKey;
|
|
122
|
+
model;
|
|
123
|
+
baseURL;
|
|
124
|
+
timeoutMs;
|
|
125
|
+
constructor(config) {
|
|
126
|
+
this.apiKey = config.apiKey;
|
|
127
|
+
this.model = config.model ?? "whisper-1";
|
|
128
|
+
this.baseURL = config.baseURL ?? "https://api.openai.com/v1";
|
|
129
|
+
this.timeoutMs = config.timeoutMs ?? 12e4;
|
|
130
|
+
}
|
|
131
|
+
async transcribe(audio, format = "audio/wav") {
|
|
132
|
+
const controller = new AbortController();
|
|
133
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
134
|
+
const ext = format.includes("webm") ? "webm" : format.includes("mp3") ? "mp3" : format.includes("ogg") ? "ogg" : "wav";
|
|
135
|
+
const audioData = audio instanceof Buffer ? audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength) : audio;
|
|
136
|
+
const blob = new Blob([audioData], { type: format });
|
|
137
|
+
const formData = new FormData();
|
|
138
|
+
formData.append("file", blob, `audio.${ext}`);
|
|
139
|
+
formData.append("model", this.model);
|
|
140
|
+
formData.append("response_format", "text");
|
|
141
|
+
let response;
|
|
142
|
+
try {
|
|
143
|
+
response = await fetch(`${this.baseURL}/audio/transcriptions`, {
|
|
144
|
+
method: "POST",
|
|
145
|
+
signal: controller.signal,
|
|
146
|
+
headers: { "Authorization": `Bearer ${this.apiKey}` },
|
|
147
|
+
body: formData
|
|
148
|
+
});
|
|
149
|
+
} catch (error) {
|
|
150
|
+
clearTimeout(timeout);
|
|
151
|
+
if (error.name === "AbortError") {
|
|
152
|
+
throw new Error(`Whisper API request timed out after ${this.timeoutMs}ms`);
|
|
153
|
+
}
|
|
154
|
+
throw error;
|
|
155
|
+
} finally {
|
|
156
|
+
clearTimeout(timeout);
|
|
157
|
+
}
|
|
158
|
+
if (!response.ok) {
|
|
159
|
+
const err = await response.text();
|
|
160
|
+
throw new Error(`Whisper API error: ${response.status} - ${err}`);
|
|
161
|
+
}
|
|
162
|
+
return response.text();
|
|
163
|
+
}
|
|
164
|
+
};
|
|
165
|
+
|
|
166
|
+
// src/adapters/openai-tts.ts
|
|
167
|
+
var OpenAITTSAdapter = class {
|
|
168
|
+
apiKey;
|
|
169
|
+
model;
|
|
170
|
+
defaultVoice;
|
|
171
|
+
baseURL;
|
|
172
|
+
timeoutMs;
|
|
173
|
+
constructor(config) {
|
|
174
|
+
this.apiKey = config.apiKey;
|
|
175
|
+
this.model = config.model ?? "tts-1";
|
|
176
|
+
this.defaultVoice = config.voice ?? "alloy";
|
|
177
|
+
this.baseURL = config.baseURL ?? "https://api.openai.com/v1";
|
|
178
|
+
this.timeoutMs = config.timeoutMs ?? 6e4;
|
|
179
|
+
}
|
|
180
|
+
async synthesize(text, options = {}) {
|
|
181
|
+
const controller = new AbortController();
|
|
182
|
+
const timeout = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
183
|
+
let response;
|
|
184
|
+
try {
|
|
185
|
+
response = await fetch(`${this.baseURL}/audio/speech`, {
|
|
186
|
+
method: "POST",
|
|
187
|
+
signal: controller.signal,
|
|
188
|
+
headers: {
|
|
189
|
+
"Content-Type": "application/json",
|
|
190
|
+
"Authorization": `Bearer ${this.apiKey}`
|
|
191
|
+
},
|
|
192
|
+
body: JSON.stringify({
|
|
193
|
+
model: this.model,
|
|
194
|
+
input: text,
|
|
195
|
+
voice: options.voice ?? this.defaultVoice,
|
|
196
|
+
response_format: options.format ?? "mp3",
|
|
197
|
+
speed: options.speed ?? 1
|
|
198
|
+
})
|
|
199
|
+
});
|
|
200
|
+
} catch (error) {
|
|
201
|
+
clearTimeout(timeout);
|
|
202
|
+
if (error.name === "AbortError") {
|
|
203
|
+
throw new Error(`TTS API request timed out after ${this.timeoutMs}ms`);
|
|
204
|
+
}
|
|
205
|
+
throw error;
|
|
206
|
+
} finally {
|
|
207
|
+
clearTimeout(timeout);
|
|
208
|
+
}
|
|
209
|
+
if (!response.ok) {
|
|
210
|
+
const err = await response.text();
|
|
211
|
+
throw new Error(`TTS API error: ${response.status} - ${err}`);
|
|
212
|
+
}
|
|
213
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
214
|
+
return Buffer.from(arrayBuffer);
|
|
215
|
+
}
|
|
216
|
+
async *synthesizeStream(text, options = {}) {
|
|
217
|
+
const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text];
|
|
218
|
+
for (const sentence of sentences) {
|
|
219
|
+
const audio = await this.synthesize(sentence.trim(), options);
|
|
220
|
+
yield audio;
|
|
221
|
+
}
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
export { OpenAISTTAdapter, OpenAITTSAdapter, RealtimeAgent };
|
|
226
|
+
//# sourceMappingURL=index.js.map
|
|
227
|
+
//# sourceMappingURL=index.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/realtime-agent.ts","../src/adapters/openai-stt.ts","../src/adapters/openai-tts.ts"],"names":[],"mappings":";;;AAoCO,IAAM,gBAAN,MAAoB;AAAA,EACjB,MAAA;AAAA,EACA,GAAA;AAAA,EACA,GAAA;AAAA,EACA,UAAA;AAAA,EAER,YAAY,OAAA,EAA+B;AACzC,IAAA,IAAA,CAAK,SAAS,OAAA,CAAQ,MAAA;AACtB,IAAA,IAAA,CAAK,MAAM,OAAA,CAAQ,GAAA;AACnB,IAAA,IAAA,CAAK,MAAM,OAAA,CAAQ,GAAA;AAEnB,IAAA,IAAA,CAAK,aAAa,IAAI,kBAAA;AAAA,MACpB;AAAA,QACE,IAAA,EAAM,QAAQ,MAAA,CAAO,IAAA;AAAA,QACrB,YAAA,EAAc,QAAQ,MAAA,CAAO,YAAA;AAAA,QAC7B,KAAA,EAAO,OAAA,CAAQ,KAAA,IAAS;AAAC,OAC3B;AAAA,MACA,OAAA,CAAQ;AAAA,KACV;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAM,OAAA,CAAQ,KAAA,EAA6B,WAAA,GAAc,WAAA,EAA6C;AACpG,IAAA,MAAM,aAAa,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,OAAO,WAAW,CAAA;AAE/D,IAAA,IAAI,QAAA,GAAW,EAAA;AACf,IAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,UAAA,CAAW,SAAA,CAAU,UAAU,CAAA,EAAG;AAC/D,MAAA,IAAI,KAAA,CAAM,SAAS,MAAA,EAAQ;AACzB,QAAA,QAAA,GAAW,KAAA,CAAM,OAAA;AAAA,MACnB;AAAA,IACF;AAEA,IAAA,IAAI,WAAA;AACJ,IAAA,IAAI,KAAK,MAAA,CAAO,GAAA,KAAQ,KAAA,IAAS,IAAA,CAAK,OAAO,QAAA,EAAU;AACrD,MAAA,WAAA,GAAc,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,QAAQ,CAAA;AAAA,IAClD;AAEA,IAAA,OAAO,EAAE,UAAA,EAAY,QAAA,EAAU,KAAA,EAAO,WAAA,EAAY;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA,EAKA,OAAO,aAAA,CACL,KAAA,EACA,WAAA,GAAc,WAAA,EACgB;AAC9B,IAAA,IAAI,UAAA;AACJ,IAAA,IAAI;AACF,MAAA,UAAA,GAAa,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,OAAO,WAAW,CAAA;AAAA,IAC3D,SAAS,KAAA,EAAO;AACd,MAAA,MAAM,EAAE,IAAA,EAAM,OAAA,EAAS,KAAA,EAAuB,OAAA,EAAU,MAAgB,OAAA,EAAQ;AAChF,MAAA;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,IAAA,EAAM,YAAA,EAAc,IAAA,EAAM,UAAA,EAAW;AAE7C,IAAA,IAAI,YAAA,GAAe,EAAA;AAEnB,IAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,UAAA,CAAW,SAAA,CAAU,UAAU,CAAA,EAAG;AAC/D,MAAA,QAAQ,MAAM,IAAA;AAAM,QAClB,KAAK,OAAA;AACH,UAAA,YAAA,IAAgB,KAAA,CAAM,KAAA;AACtB,UAAA,MAAM,EAAE,IAAA,EAAM,OAAA,EAAS,OAAA,EAAS,MAAM,KAAA,EAAM;AAC5C,UAAA;AAAA,QACF,KAAK,WAAA;AACH,UAAA,MAAM,EAAE,MAAM,WAAA,EAAa,IAAA,EAAM,MAAM,IAAA,EAAM,IAAA,EAAM,MAAM,SAAA,EAAU;AACnE,UAAA;AAAA,QACF,KAAK,aAAA;AACH,UAAA,MAAM,EAAE,IAAA,EAAM,aAAA,EAAe,IAAA,EAAM,MAAA,CAAO,MAAM,UAAU,CAAA,EAAG,MAAA,EAAQ,KAAA,CAAM,MAAA,EAAO;AAClF,UAAA;AAAA,QACF,KAAK,MAAA;AACH,UAAA,YAAA,GAAe,MAAM,OAAA,IAAW,YAAA;AAChC,UAAA;AAAA,QACF,KAAK,OAAA;AACH,UAAA,MAAM,EAAE,MAAM,OAAA,EAAS,KAAA,EAAO,MAAM,KAAA,EAAO,OAAA,EAAS,KAAA,CAAM,KAAA,CAAM,OAAA,EAAQ;AACxE,UAAA;AAAA;AACJ,IACF;AAGA,IAAA,IAAI,WAAA;AACJ,IAAA,IAAI,KAAK,MAAA,CAAO,GAAA,KAAQ,KAAA,IAAS,IAAA,CAAK,OAAO,YAAA,EAAc;AACzD,MAAA,IAAI,IAAA,CAAK,IAAI,gBAAA,EAAkB;AAC7B,QAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,GAAA,CAAI,gBAAA,CAAiB,YAAY,CAAA,EAAG;AACjE,UAAA,MAAM,EAAE,IAAA,EAAM,aAAA,EAAe,IAAA,EAAM,KAAA,EAAM;AACzC,UAAA,IAAI,CAAC,aAAa,WAAA,GAAc,KAAA;AAAA,6BACb,MAAA,CAAO,MAAA,CAAO,CAAC,WAAA,EAAa,KAAK,CAAC,CAAA;AAAA,QACvD;AAAA,MACF,CAAA,MAAO;AACL,QAAA,WAAA,GAAc,MAAM,IAAA,CAAK,GAAA,CAAI,UAAA,CAAW,YAAY,CAAA;AACpD,QAAA,MAAM,EAAE,IAAA,EAAM,aAAA,EAAe,IAAA,EAAM,WAAA,EAAY;AAAA,MACjD;AAAA,IACF;AAEA,IAAA,MAAM,EAAE,IAAA,EAAM,MAAA,EAAQ,YAAY,QAAA,EAAU,YAAA,EAAc,OAAO,WAAA,EAAY;AAAA,EAC/E;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAaA,SAAA,GAAY;AACV,IAAA,OAAO,CAAC,EAAA,KAGF;AACJ,MAAA,EAAA,CAAG,EAAA,CAAG,SAAA,EAAW,OAAO,IAAA,KAAkB;AACxC,QAAA,MAAM,KAAA,GAAQ,IAAA,YAAgB,MAAA,GAAS,IAAA,GACnC,IAAA,YAAgB,WAAA,GAAc,MAAA,CAAO,IAAA,CAAK,IAAI,CAAA,GAC9C,MAAA,CAAO,IAAA,CAAK,IAAkB,CAAA;AAElC,QAAA,IAAI;AACF,UAAA,WAAA,MAAiB,KAAA,IAAS,IAAA,CAAK,aAAA,CAAc,KAAK,CAAA,EAAG;AACnD,YAAA,EAAA,CAAG,IAAA,CAAK,IAAA,CAAK,SAAA,CAAU,KAAK,CAAC,CAAA;AAAA,UAC/B;AAAA,QACF,SAAS,KAAA,EAAO;AACd,UAAA,EAAA,CAAG,IAAA,CAAK,KAAK,SAAA,CAAU;AAAA,YACrB,IAAA,EAAM,OAAA;AAAA,YACN,KAAA,EAAO,EAAE,OAAA,EAAU,KAAA,CAAgB,OAAA,EAAQ;AAAA,YAC3C,SAAU,KAAA,CAAgB;AAAA,WAC3B,CAAC,CAAA;AAAA,QACJ;AAAA,MACF,CAAC,CAAA;AAAA,IACH,CAAA;AAAA,EACF;AACF;;;AChKO,IAAM,mBAAN,MAA6C;AAAA,EAC1C,MAAA;AAAA,EACA,KAAA;AAAA,EACA,OAAA;AAAA,EACA,SAAA;AAAA,EAER,YAAY,MAAA,EAAyB;AACnC,IAAA,IAAA,CAAK,SAAS,MAAA,CAAO,MAAA;AACrB,IAAA,IAAA,CAAK,KAAA,GAAQ,OAAO,KAAA,IAAS,WAAA;AAC7B,IAAA,IAAA,CAAK,OAAA,GAAU,OAAO,OAAA,IAAW,2BAAA;AACjC,IAAA,IAAA,CAAK,SAAA,GAAY,OAAO,SAAA,IAAa,IAAA;AAAA,EACvC;AAAA,EAEA,MAAM,UAAA,CAAW,KAAA,EAA6B,MAAA,GAAS,WAAA,EAA8B;AACnF,IAAA,MAAM,UAAA,GAAa,IAAI,eAAA,EAAgB;AACvC,IAAA,MAAM,UAAU,UAAA,CAAW,MAAM,WAAW,KAAA,EAAM,EAAG,KAAK,SAAS,CAAA;AAEnE,IAAA,MAAM,GAAA,GAAM,MAAA,CAAO,QAAA,CAAS,MAAM,IAAI,MAAA,GAClC,MAAA,CAAO,QAAA,CAAS,KAAK,IAAI,KAAA,GACzB,MAAA,CAAO,QAAA,CAAS,KAAK,IAAI,KAAA,GACzB,KAAA;AAEJ,IAAA,MAAM,SAAA,GAAY,KAAA,YAAiB,MAAA,GAAS,KAAA,CAAM,MAAA,CAAO,KAAA,CAAM,KAAA,CAAM,UAAA,EAAY,KAAA,CAAM,UAAA,GAAa,KAAA,CAAM,UAAU,CAAA,GAAI,KAAA;AACxH,IAAA,MAAM,IAAA,GAAO,IAAI,IAAA,CAAK,CAAC,SAAqB,CAAA,EAAG,EAAE,IAAA,EAAM,MAAA,EAAQ,CAAA;AAC/D,IAAA,MAAM,QAAA,GAAW,IAAI,QAAA,EAAS;AAC9B,IAAA,QAAA,CAAS,MAAA,CAAO,MAAA,EAAQ,IAAA,EAAM,CAAA,MAAA,EAAS,GAAG,CAAA,CAAE,CAAA;AAC5C,IAAA,QAAA,CAAS,MAAA,CAAO,OAAA,EAAS,IAAA,CAAK,KAAK,CAAA;AACnC,IAAA,QAAA,CAAS,MAAA,CAAO,mBAAmB,MAAM,CAAA;AAEzC,IAAA,IAAI,QAAA;AACJ,IAAA,IAAI;AACF,MAAA,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,IAAA,CAAK,OAAO,CAAA,qBAAA,CAAA,EAAyB;AAAA,QAC7D,MAAA,EAAQ,MAAA;AAAA,QACR,QAAQ,UAAA,CAAW,MAAA;AAAA,QACnB,SAAS,EAAE,eAAA,EAAiB,CAAA,OAAA,EAAU,IAAA,CAAK,MAAM,CAAA,CAAA,EAAG;AAAA,QACpD,IAAA,EAAM;AAAA,OACP,CAAA;AAAA,IACH,SAAS,KAAA,EAAO;AACd,MAAA,YAAA,CAAa,OAAO,CAAA;AACpB,MAAA,IAAK,KAAA,CAAgB,SAAS,YAAA,EAAc;AAC1C,QAAA,MAAM,IAAI,KAAA,CAAM,CAAA,oCAAA,EAAuC,IAAA,CAAK,SAAS,CAAA,EAAA,CAAI,CAAA;AAAA,MAC3E;AACA,MAAA,MAAM,KAAA;AAAA,IACR,CAAA,SAAE;AACA,MAAA,YAAA,CAAa,OAAO,CAAA;AAAA,IACtB;AAEA,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,IAAA,EAAK;AAChC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,mBAAA,EAAsB,SAAS,MAAM,CAAA,GAAA,EAAM,GAAG,CAAA,CAAE,CAAA;AAAA,IAClE;AAEA,IAAA,OAAO,SAAS,IAAA,EAAK;AAAA,EACvB;AACF;;;ACrDO,IAAM,mBAAN,MAA6C;AAAA,EAC1C,MAAA;AAAA,EACA,KAAA;AAAA,EACA,YAAA;AAAA,EACA,OAAA;AAAA,EACA,SAAA;AAAA,EAER,YAAY,MAAA,EAAyB;AACnC,IAAA,IAAA,CAAK,SAAS,MAAA,CAAO,MAAA;AACrB,IAAA,IAAA,CAAK,KAAA,GAAQ,OAAO,KAAA,IAAS,OAAA;AAC7B,IAAA,IAAA,CAAK,YAAA,GAAe,OAAO,KAAA,IAAS,OAAA;AACpC,IAAA,IAAA,CAAK,OAAA,GAAU,OAAO,OAAA,IAAW,2BAAA;AACjC,IAAA,IAAA,CAAK,SAAA,GAAY,OAAO,SAAA,IAAa,GAAA;AAAA,EACvC;AAAA,EAEA,MAAM,UAAA,CAAW,IAAA,EAAc,OAAA,GAAgC,EAAC,EAAoB;AAClF,IAAA,MAAM,UAAA,GAAa,IAAI,eAAA,EAAgB;AACvC,IAAA,MAAM,UAAU,UAAA,CAAW,MAAM,WAAW,KAAA,EAAM,EAAG,KAAK,SAAS,CAAA;AAEnE,IAAA,IAAI,QAAA;AACJ,IAAA,IAAI;AACF,MAAA,QAAA,GAAW,MAAM,KAAA,CAAM,CAAA,EAAG,IAAA,CAAK,OAAO,CAAA,aAAA,CAAA,EAAiB;AAAA,QACrD,MAAA,EAAQ,MAAA;AAAA,QACR,QAAQ,UAAA,CAAW,MAAA;AAAA,QACnB,OAAA,EAAS;AAAA,UACP,cAAA,EAAgB,kBAAA;AAAA,UAChB,eAAA,EAAiB,CAAA,OAAA,EAAU,IAAA,CAAK,MAAM,CAAA;AAAA,SACxC;AAAA,QACA,IAAA,EAAM,KAAK,SAAA,CAAU;AAAA,UACnB,OAAO,IAAA,CAAK,KAAA;AAAA,UACZ,KAAA,EAAO,IAAA;AAAA,UACP,KAAA,EAAO,OAAA,CAAQ,KAAA,IAAS,IAAA,CAAK,YAAA;AAAA,UAC7B,eAAA,EAAiB,QAAQ,MAAA,IAAU,KAAA;AAAA,UACnC,KAAA,EAAO,QAAQ,KAAA,IAAS;AAAA,SACzB;AAAA,OACF,CAAA;AAAA,IACH,SAAS,KAAA,EAAO;AACd,MAAA,YAAA,CAAa,OAAO,CAAA;AACpB,MAAA,IAAK,KAAA,CAAgB,SAAS,YAAA,EAAc;AAC1C,QAAA,MAAM,IAAI,KAAA,CAAM,CAAA,gCAAA,EAAmC,IAAA,CAAK,SAAS,CAAA,EAAA,CAAI,CAAA;AAAA,MACvE;AACA,MAAA,MAAM,KAAA;AAAA,IACR,CAAA,SAAE;AACA,MAAA,YAAA,CAAa,OAAO,CAAA;AAAA,IACtB;AAEA,IAAA,IAAI,CAAC,SAAS,EAAA,EAAI;AAChB,MAAA,MAAM,GAAA,GAAM,MAAM,QAAA,CAAS,IAAA,EAAK;AAChC,MAAA,MAAM,IAAI,KAAA,CAAM,CAAA,eAAA,EAAkB,SAAS,MAAM,CAAA,GAAA,EAAM,GAAG,CAAA,CAAE,CAAA;AAAA,IAC9D;AAEA,IAAA,MAAM,WAAA,GAAc,MAAM,QAAA,CAAS,WAAA,EAAY;AAC/C,IAAA,OAAO,MAAA,CAAO,KAAK,WAAW,CAAA;AAAA,EAChC;AAAA,EAEA,OAAO,gBAAA,CAAiB,IAAA,EAAc,OAAA,GAAgC,EAAC,EAA0B;AAE/F,IAAA,MAAM,YAAY,IAAA,CAAK,KAAA,CAAM,gBAAgB,CAAA,IAAK,CAAC,IAAI,CAAA;AACvD,IAAA,KAAA,MAAW,YAAY,SAAA,EAAW;AAChC,MAAA,MAAM,QAAQ,MAAM,IAAA,CAAK,WAAW,QAAA,CAAS,IAAA,IAAQ,OAAO,CAAA;AAC5D,MAAA,MAAM,KAAA;AAAA,IACR;AAAA,EACF;AACF","file":"index.js","sourcesContent":["import type { LLMAdapter } from '@orka-js/core';\nimport { StreamingToolAgent } from '@orka-js/agent';\nimport type { Tool } from '@orka-js/agent';\nimport type {\n STTAdapter,\n TTSAdapter,\n RealtimeAgentConfig,\n RealtimeEvent,\n RealtimeProcessResult,\n} from './types.js';\n\nexport interface RealtimeAgentOptions {\n config: RealtimeAgentConfig;\n llm: LLMAdapter;\n stt: STTAdapter;\n tts?: TTSAdapter;\n tools?: Tool[];\n}\n\n/**\n * A voice agent that processes audio through the STT → LLM → TTS pipeline.\n *\n * @example\n * ```typescript\n * const agent = new RealtimeAgent({\n * config: { goal: 'Answer questions', tts: true },\n * llm: new OpenAIAdapter({ apiKey }),\n * stt: new OpenAISTTAdapter({ apiKey }),\n * tts: new OpenAITTSAdapter({ apiKey }),\n * });\n *\n * const result = await agent.process(audioBuffer);\n * console.log(result.transcript); // what the user said\n * console.log(result.response); // the LLM's reply\n * ```\n */\nexport class RealtimeAgent {\n private config: RealtimeAgentConfig;\n private stt: STTAdapter;\n private tts?: TTSAdapter;\n private innerAgent: StreamingToolAgent;\n\n constructor(options: RealtimeAgentOptions) {\n this.config = options.config;\n this.stt = options.stt;\n this.tts = options.tts;\n\n this.innerAgent = new StreamingToolAgent(\n {\n goal: options.config.goal,\n systemPrompt: options.config.systemPrompt,\n tools: options.tools ?? [],\n },\n options.llm,\n );\n }\n\n /**\n * Process audio: transcribe → run LLM → synthesize.\n * Returns final transcript, response text and audio buffer.\n */\n async process(audio: Buffer | ArrayBuffer, audioFormat = 'audio/wav'): Promise<RealtimeProcessResult> {\n const transcript = await this.stt.transcribe(audio, audioFormat);\n\n let response = '';\n for await (const event of this.innerAgent.runStream(transcript)) {\n if (event.type === 'done') {\n response = event.content;\n }\n }\n\n let audioOutput: Buffer | undefined;\n if (this.config.tts !== false && this.tts && response) {\n audioOutput = await this.tts.synthesize(response);\n }\n\n return { transcript, response, audio: audioOutput };\n }\n\n /**\n * Process audio as a stream of events — yields transcript, tokens, audio chunks, and done.\n */\n async *processStream(\n audio: Buffer | ArrayBuffer,\n audioFormat = 'audio/wav',\n ): AsyncIterable<RealtimeEvent> {\n let transcript: string;\n try {\n transcript = await this.stt.transcribe(audio, audioFormat);\n } catch (error) {\n yield { type: 'error', error: error as Error, message: (error as Error).message };\n return;\n }\n\n yield { type: 'transcript', text: transcript };\n\n let fullResponse = '';\n\n for await (const event of this.innerAgent.runStream(transcript)) {\n switch (event.type) {\n case 'token':\n fullResponse += event.token;\n yield { type: 'token', content: event.token };\n break;\n case 'tool_call':\n yield { type: 'tool_call', name: event.name, args: event.arguments };\n break;\n case 'tool_result':\n yield { type: 'tool_result', name: String(event.toolCallId), result: event.result };\n break;\n case 'done':\n fullResponse = event.content || fullResponse;\n break;\n case 'error':\n yield { type: 'error', error: event.error, message: event.error.message };\n return;\n }\n }\n\n // Synthesize audio\n let audioOutput: Buffer | undefined;\n if (this.config.tts !== false && this.tts && fullResponse) {\n if (this.tts.synthesizeStream) {\n for await (const chunk of this.tts.synthesizeStream(fullResponse)) {\n yield { type: 'audio_chunk', data: chunk };\n if (!audioOutput) audioOutput = chunk;\n else audioOutput = Buffer.concat([audioOutput, chunk]);\n }\n } else {\n audioOutput = await this.tts.synthesize(fullResponse);\n yield { type: 'audio_chunk', data: audioOutput };\n }\n }\n\n yield { type: 'done', transcript, response: fullResponse, audio: audioOutput };\n }\n\n /**\n * Returns an Express/Node.js-compatible WebSocket message handler.\n * Expects binary audio messages from the client, sends back JSON events.\n *\n * @example\n * ```typescript\n * import { WebSocketServer } from 'ws';\n * const wss = new WebSocketServer({ port: 8080 });\n * wss.on('connection', agent.wsHandler());\n * ```\n */\n wsHandler() {\n return (ws: {\n on(event: string, listener: (...args: unknown[]) => void): void;\n send(data: string): void;\n }) => {\n ws.on('message', async (data: unknown) => {\n const audio = data instanceof Buffer ? data\n : data instanceof ArrayBuffer ? Buffer.from(data)\n : Buffer.from(data as Uint8Array);\n\n try {\n for await (const event of this.processStream(audio)) {\n ws.send(JSON.stringify(event));\n }\n } catch (error) {\n ws.send(JSON.stringify({\n type: 'error',\n error: { message: (error as Error).message },\n message: (error as Error).message,\n }));\n }\n });\n };\n }\n}\n","import type { STTAdapter } from '../types.js';\n\nexport interface OpenAISTTConfig {\n apiKey: string;\n model?: string;\n baseURL?: string;\n timeoutMs?: number;\n}\n\n/**\n * STT adapter using OpenAI Whisper API.\n */\nexport class OpenAISTTAdapter implements STTAdapter {\n private apiKey: string;\n private model: string;\n private baseURL: string;\n private timeoutMs: number;\n\n constructor(config: OpenAISTTConfig) {\n this.apiKey = config.apiKey;\n this.model = config.model ?? 'whisper-1';\n this.baseURL = config.baseURL ?? 'https://api.openai.com/v1';\n this.timeoutMs = config.timeoutMs ?? 120_000;\n }\n\n async transcribe(audio: Buffer | ArrayBuffer, format = 'audio/wav'): Promise<string> {\n const controller = new AbortController();\n const timeout = setTimeout(() => controller.abort(), this.timeoutMs);\n\n const ext = format.includes('webm') ? 'webm'\n : format.includes('mp3') ? 'mp3'\n : format.includes('ogg') ? 'ogg'\n : 'wav';\n\n const audioData = audio instanceof Buffer ? audio.buffer.slice(audio.byteOffset, audio.byteOffset + audio.byteLength) : audio;\n const blob = new Blob([audioData as BlobPart], { type: format });\n const formData = new FormData();\n formData.append('file', blob, `audio.${ext}`);\n formData.append('model', this.model);\n formData.append('response_format', 'text');\n\n let response: Response;\n try {\n response = await fetch(`${this.baseURL}/audio/transcriptions`, {\n method: 'POST',\n signal: controller.signal,\n headers: { 'Authorization': `Bearer ${this.apiKey}` },\n body: formData,\n });\n } catch (error) {\n clearTimeout(timeout);\n if ((error as Error).name === 'AbortError') {\n throw new Error(`Whisper API request timed out after ${this.timeoutMs}ms`);\n }\n throw error;\n } finally {\n clearTimeout(timeout);\n }\n\n if (!response.ok) {\n const err = await response.text();\n throw new Error(`Whisper API error: ${response.status} - ${err}`);\n }\n\n return response.text();\n }\n}\n","import type { TTSAdapter, TTSSynthesizeOptions } from '../types.js';\n\nexport interface OpenAITTSConfig {\n apiKey: string;\n model?: string;\n voice?: 'alloy' | 'echo' | 'fable' | 'onyx' | 'nova' | 'shimmer';\n baseURL?: string;\n timeoutMs?: number;\n}\n\n/**\n * TTS adapter using OpenAI TTS API.\n */\nexport class OpenAITTSAdapter implements TTSAdapter {\n private apiKey: string;\n private model: string;\n private defaultVoice: string;\n private baseURL: string;\n private timeoutMs: number;\n\n constructor(config: OpenAITTSConfig) {\n this.apiKey = config.apiKey;\n this.model = config.model ?? 'tts-1';\n this.defaultVoice = config.voice ?? 'alloy';\n this.baseURL = config.baseURL ?? 'https://api.openai.com/v1';\n this.timeoutMs = config.timeoutMs ?? 60_000;\n }\n\n async synthesize(text: string, options: TTSSynthesizeOptions = {}): Promise<Buffer> {\n const controller = new AbortController();\n const timeout = setTimeout(() => controller.abort(), this.timeoutMs);\n\n let response: Response;\n try {\n response = await fetch(`${this.baseURL}/audio/speech`, {\n method: 'POST',\n signal: controller.signal,\n headers: {\n 'Content-Type': 'application/json',\n 'Authorization': `Bearer ${this.apiKey}`,\n },\n body: JSON.stringify({\n model: this.model,\n input: text,\n voice: options.voice ?? this.defaultVoice,\n response_format: options.format ?? 'mp3',\n speed: options.speed ?? 1.0,\n }),\n });\n } catch (error) {\n clearTimeout(timeout);\n if ((error as Error).name === 'AbortError') {\n throw new Error(`TTS API request timed out after ${this.timeoutMs}ms`);\n }\n throw error;\n } finally {\n clearTimeout(timeout);\n }\n\n if (!response.ok) {\n const err = await response.text();\n throw new Error(`TTS API error: ${response.status} - ${err}`);\n }\n\n const arrayBuffer = await response.arrayBuffer();\n return Buffer.from(arrayBuffer);\n }\n\n async *synthesizeStream(text: string, options: TTSSynthesizeOptions = {}): AsyncIterable<Buffer> {\n // Split text into sentences for lower latency\n const sentences = text.match(/[^.!?]+[.!?]+/g) ?? [text];\n for (const sentence of sentences) {\n const audio = await this.synthesize(sentence.trim(), options);\n yield audio;\n }\n }\n}\n"]}
|
package/package.json
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@orka-js/realtime",
|
|
3
|
+
"version": "1.5.0",
|
|
4
|
+
"description": "Voice agent for OrkaJS — STT → LLM → TTS pipeline with WebSocket support",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "./dist/index.cjs",
|
|
7
|
+
"module": "./dist/index.js",
|
|
8
|
+
"types": "./dist/index.d.ts",
|
|
9
|
+
"exports": {
|
|
10
|
+
".": {
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"require": "./dist/index.cjs",
|
|
13
|
+
"types": "./dist/index.d.ts"
|
|
14
|
+
}
|
|
15
|
+
},
|
|
16
|
+
"files": [
|
|
17
|
+
"dist"
|
|
18
|
+
],
|
|
19
|
+
"dependencies": {
|
|
20
|
+
"@orka-js/core": "1.5.0",
|
|
21
|
+
"@orka-js/agent": "1.5.2"
|
|
22
|
+
},
|
|
23
|
+
"peerDependencies": {
|
|
24
|
+
"ws": ">=8.0.0"
|
|
25
|
+
},
|
|
26
|
+
"peerDependenciesMeta": {
|
|
27
|
+
"ws": {
|
|
28
|
+
"optional": true
|
|
29
|
+
}
|
|
30
|
+
},
|
|
31
|
+
"devDependencies": {
|
|
32
|
+
"@types/ws": "^8.5.10",
|
|
33
|
+
"vitest": "^1.6.0"
|
|
34
|
+
},
|
|
35
|
+
"scripts": {
|
|
36
|
+
"build": "tsup",
|
|
37
|
+
"dev": "tsup --watch",
|
|
38
|
+
"typecheck": "tsc --noEmit",
|
|
39
|
+
"test": "vitest run"
|
|
40
|
+
}
|
|
41
|
+
}
|