open-agents-ai 0.187.277 → 0.187.278
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +92 -16
- package/package.json +1 -1
package/dist/index.js
CHANGED
|
@@ -294676,6 +294676,8 @@ var init_voice = __esm({
|
|
|
294676
294676
|
currentPlayback = null;
|
|
294677
294677
|
speakQueue = [];
|
|
294678
294678
|
speaking = false;
|
|
294679
|
+
drainPromise = null;
|
|
294680
|
+
drainResolve = null;
|
|
294679
294681
|
phonemizeFn = null;
|
|
294680
294682
|
/** True when current model uses MLX Audio backend */
|
|
294681
294683
|
mlxActive = false;
|
|
@@ -294992,6 +294994,15 @@ var init_voice = __esm({
|
|
|
294992
294994
|
const speedFactor = emotion ? emotionToSpeedFactor(emotion, this.starkMode, this.autistMode) : 1;
|
|
294993
294995
|
this.enqueueSpeech(text, 0.55, 1 + pitchBias, speedFactor, 0.15);
|
|
294994
294996
|
}
|
|
294997
|
+
/** Wait until the speak queue is fully drained (all audio played). */
|
|
294998
|
+
async waitUntilIdle() {
|
|
294999
|
+
if (!this.speaking && this.speakQueue.length === 0) return;
|
|
295000
|
+
if (this.drainPromise) {
|
|
295001
|
+
await this.drainPromise;
|
|
295002
|
+
} else {
|
|
295003
|
+
await this.sleep(100);
|
|
295004
|
+
}
|
|
295005
|
+
}
|
|
294995
295006
|
enqueueSpeech(text, volume, pitchFactor, speedFactor = 1, stereoDelayMs = 0.6) {
|
|
294996
295007
|
if (!this.enabled || !this.ready) return;
|
|
294997
295008
|
text = sanitizeForTTS(text);
|
|
@@ -295190,6 +295201,11 @@ var init_voice = __esm({
|
|
|
295190
295201
|
*/
|
|
295191
295202
|
async drainQueue() {
|
|
295192
295203
|
this.speaking = true;
|
|
295204
|
+
if (!this.drainPromise) {
|
|
295205
|
+
this.drainPromise = new Promise((resolve40) => {
|
|
295206
|
+
this.drainResolve = resolve40;
|
|
295207
|
+
});
|
|
295208
|
+
}
|
|
295193
295209
|
let isFirst = true;
|
|
295194
295210
|
let prefetchedWav = null;
|
|
295195
295211
|
while (this.speakQueue.length > 0) {
|
|
@@ -295241,6 +295257,14 @@ var init_voice = __esm({
|
|
|
295241
295257
|
}
|
|
295242
295258
|
}
|
|
295243
295259
|
this.speaking = false;
|
|
295260
|
+
if (this.drainResolve) {
|
|
295261
|
+
try {
|
|
295262
|
+
this.drainResolve();
|
|
295263
|
+
} catch {
|
|
295264
|
+
}
|
|
295265
|
+
}
|
|
295266
|
+
this.drainResolve = null;
|
|
295267
|
+
this.drainPromise = null;
|
|
295244
295268
|
}
|
|
295245
295269
|
sleep(ms) {
|
|
295246
295270
|
return new Promise((resolve40) => setTimeout(resolve40, ms));
|
|
@@ -328218,13 +328242,12 @@ __export(voicechat_exports, {
|
|
|
328218
328242
|
VoiceChatSession: () => VoiceChatSession
|
|
328219
328243
|
});
|
|
328220
328244
|
import { EventEmitter as EventEmitter10 } from "node:events";
|
|
328221
|
-
var VAD_SILENCE_MS, MAX_SEGMENT_MS,
|
|
328245
|
+
var VAD_SILENCE_MS, MAX_SEGMENT_MS, MAX_CONTEXT_TURNS, SYSTEM_PROMPT2, VoiceChatSession;
|
|
328222
328246
|
var init_voicechat = __esm({
|
|
328223
328247
|
"packages/cli/src/tui/voicechat.ts"() {
|
|
328224
328248
|
"use strict";
|
|
328225
|
-
VAD_SILENCE_MS =
|
|
328249
|
+
VAD_SILENCE_MS = 2e3;
|
|
328226
328250
|
MAX_SEGMENT_MS = 6500;
|
|
328227
|
-
SUMMARY_INJECTION_INTERVAL = 4;
|
|
328228
328251
|
MAX_CONTEXT_TURNS = 20;
|
|
328229
328252
|
SYSTEM_PROMPT2 = `You are a voice assistant having a live spoken conversation. Keep responses extremely brief — 1-2 sentences max. You're speaking aloud, not writing. Be conversational, direct, and helpful. Don't use markdown, bullet points, or formatting — just natural speech. If you don't know something, say so briefly. Do not over-think — respond quickly and concisely.`;
|
|
328230
328253
|
VoiceChatSession = class extends EventEmitter10 {
|
|
@@ -328256,6 +328279,7 @@ var init_voicechat = __esm({
|
|
|
328256
328279
|
// Bound handlers for cleanup
|
|
328257
328280
|
_onTranscript = null;
|
|
328258
328281
|
_onError = null;
|
|
328282
|
+
_retryMicTimer = null;
|
|
328259
328283
|
constructor(opts) {
|
|
328260
328284
|
super();
|
|
328261
328285
|
this.voice = opts.voice;
|
|
@@ -328321,6 +328345,19 @@ var init_voicechat = __esm({
|
|
|
328321
328345
|
this._onError = (err) => {
|
|
328322
328346
|
const msg = err instanceof Error ? err.message : String(err);
|
|
328323
328347
|
this.onStatus(`ASR error (voicechat continues without mic): ${msg.slice(0, 80)}`);
|
|
328348
|
+
if (this.active && !this._retryMicTimer) {
|
|
328349
|
+
this._retryMicTimer = setTimeout(async () => {
|
|
328350
|
+
this._retryMicTimer = null;
|
|
328351
|
+
if (!this.active) return;
|
|
328352
|
+
try {
|
|
328353
|
+
await this.listen.stop().catch(() => {
|
|
328354
|
+
});
|
|
328355
|
+
await this.listen.start();
|
|
328356
|
+
this.onStatus("Mic auto-recovered — LISTENING");
|
|
328357
|
+
} catch {
|
|
328358
|
+
}
|
|
328359
|
+
}, 1e3);
|
|
328360
|
+
}
|
|
328324
328361
|
};
|
|
328325
328362
|
this.listen.on("transcript", this._onTranscript);
|
|
328326
328363
|
this.listen.on("error", this._onError);
|
|
@@ -328422,6 +328459,12 @@ var init_voicechat = __esm({
|
|
|
328422
328459
|
this.onUserSpeech(text);
|
|
328423
328460
|
this.context.push({ role: "user", content: text });
|
|
328424
328461
|
this.turnCount++;
|
|
328462
|
+
if (this.runner) {
|
|
328463
|
+
try {
|
|
328464
|
+
this.runner.injectUserMessage(`[VOICECHAT] ${text}`);
|
|
328465
|
+
} catch {
|
|
328466
|
+
}
|
|
328467
|
+
}
|
|
328425
328468
|
while (this.context.length > MAX_CONTEXT_TURNS + 1) {
|
|
328426
328469
|
this.context.splice(1, 1);
|
|
328427
328470
|
}
|
|
@@ -328443,11 +328486,18 @@ var init_voicechat = __esm({
|
|
|
328443
328486
|
this.setState("SPEAKING");
|
|
328444
328487
|
this.onAgentSpeech(response.trim());
|
|
328445
328488
|
this.voice.speak(response.trim());
|
|
328446
|
-
if (this.runner
|
|
328489
|
+
if (this.runner) {
|
|
328447
328490
|
this.injectSummary();
|
|
328448
328491
|
}
|
|
328449
|
-
|
|
328450
|
-
|
|
328492
|
+
if (typeof this.voice.waitUntilIdle === "function") {
|
|
328493
|
+
try {
|
|
328494
|
+
await this.voice.waitUntilIdle();
|
|
328495
|
+
} catch {
|
|
328496
|
+
}
|
|
328497
|
+
} else {
|
|
328498
|
+
const estimatedMs = Math.max(1500, response.length / 5 * (6e4 / 150));
|
|
328499
|
+
await new Promise((r2) => setTimeout(r2, estimatedMs));
|
|
328500
|
+
}
|
|
328451
328501
|
}
|
|
328452
328502
|
} catch (err) {
|
|
328453
328503
|
if (!this.active) return;
|
|
@@ -328570,13 +328620,19 @@ var init_voicechat = __esm({
|
|
|
328570
328620
|
// ---------------------------------------------------------------------------
|
|
328571
328621
|
injectSummary() {
|
|
328572
328622
|
if (!this.runner) return;
|
|
328573
|
-
const recentTurns = this.context.filter((t2) => t2.role !== "system").slice(-
|
|
328623
|
+
const recentTurns = this.context.filter((t2) => t2.role !== "system").slice(-8).map((t2) => `${t2.role === "user" ? "User" : "Assistant"}: ${t2.content}`).join("\n");
|
|
328574
328624
|
this.runner.injectUserMessage(
|
|
328575
|
-
`[VOICECHAT SUMMARY]
|
|
328625
|
+
`[VOICECHAT SUMMARY] Parallel voice liaison update (for awareness only). Continue your current task; do not respond to this directly.
|
|
328576
328626
|
|
|
328577
328627
|
${recentTurns}`
|
|
328578
328628
|
);
|
|
328579
328629
|
}
|
|
328630
|
+
/** Enqueue narration from main agent events into the voice channel */
|
|
328631
|
+
enqueueAgentNarration(text, subordinate = true) {
|
|
328632
|
+
if (!text || !this.active) return;
|
|
328633
|
+
if (subordinate) this.voice.speakSubordinate(text);
|
|
328634
|
+
else this.voice.speak(text);
|
|
328635
|
+
}
|
|
328580
328636
|
};
|
|
328581
328637
|
}
|
|
328582
328638
|
});
|
|
@@ -330156,6 +330212,17 @@ ${entry.fullContent}`
|
|
|
330156
330212
|
}
|
|
330157
330213
|
});
|
|
330158
330214
|
}
|
|
330215
|
+
if (voice?.enabled && voice.voiceMode === "voicechat" && _voiceChatSession?.isActive && event.toolName === "task_complete") {
|
|
330216
|
+
const emoStateFinal = emotionEngine?.getState();
|
|
330217
|
+
const emoCtxFinal = emoStateFinal ? { valence: emoStateFinal.valence, arousal: emoStateFinal.arousal, label: emoStateFinal.label, emoji: emoStateFinal.emoji } : void 0;
|
|
330218
|
+
const desc = describeTaskComplete(String(event.content ?? ""), true, vLevel);
|
|
330219
|
+
if (desc) {
|
|
330220
|
+
try {
|
|
330221
|
+
_voiceChatSession.enqueueAgentNarration(desc, false);
|
|
330222
|
+
} catch {
|
|
330223
|
+
}
|
|
330224
|
+
}
|
|
330225
|
+
}
|
|
330159
330226
|
break;
|
|
330160
330227
|
}
|
|
330161
330228
|
case "model_response":
|
|
@@ -330237,6 +330304,15 @@ ${entry.fullContent}`
|
|
|
330237
330304
|
voice.speak(chatText);
|
|
330238
330305
|
voice._spokenStreamText = true;
|
|
330239
330306
|
}
|
|
330307
|
+
} else if (voice?.enabled && voice.voiceMode === "voicechat" && (streamTextBuffer || event.content)) {
|
|
330308
|
+
const chatText = (streamTextBuffer || event.content || "").trim();
|
|
330309
|
+
streamTextBuffer = "";
|
|
330310
|
+
if (chatText.length > 10 && _voiceChatSession?.isActive) {
|
|
330311
|
+
try {
|
|
330312
|
+
_voiceChatSession.enqueueAgentNarration(chatText, false);
|
|
330313
|
+
} catch {
|
|
330314
|
+
}
|
|
330315
|
+
}
|
|
330240
330316
|
}
|
|
330241
330317
|
break;
|
|
330242
330318
|
}
|
|
@@ -331274,7 +331350,7 @@ ${opts.systemPromptAddition}` : `Working directory: ${repoRoot}`;
|
|
|
331274
331350
|
autoUpdateTimer.unref();
|
|
331275
331351
|
const voiceEngine = new VoiceEngine();
|
|
331276
331352
|
let voiceSession = null;
|
|
331277
|
-
let
|
|
331353
|
+
let _voiceChatSession2 = null;
|
|
331278
331354
|
let tunnelGateway = null;
|
|
331279
331355
|
let p2pGateway = null;
|
|
331280
331356
|
let peerMesh = null;
|
|
@@ -332963,7 +333039,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
332963
333039
|
},
|
|
332964
333040
|
// --- /voicechat: Voryn-style state machine voice conversation ---
|
|
332965
333041
|
async voiceChatStart() {
|
|
332966
|
-
if (
|
|
333042
|
+
if (_voiceChatSession2?.isActive) return;
|
|
332967
333043
|
if (!voiceEngine.enabled || !voiceEngine.ready) {
|
|
332968
333044
|
writeContent(() => renderInfo2("Auto-enabling voice for voice chat..."));
|
|
332969
333045
|
const voiceMsg = await voiceEngine.toggle();
|
|
@@ -332980,7 +333056,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
332980
333056
|
}
|
|
332981
333057
|
}
|
|
332982
333058
|
};
|
|
332983
|
-
|
|
333059
|
+
_voiceChatSession2 = new VoiceChatSession2({
|
|
332984
333060
|
voice: voiceEngine,
|
|
332985
333061
|
listen: listenEng,
|
|
332986
333062
|
backendUrl: currentConfig.backendUrl,
|
|
@@ -333005,16 +333081,16 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
|
|
|
333005
333081
|
writeContent(() => renderInfo2(`\x1B[38;5;243m[voicechat] ${state}\x1B[0m`));
|
|
333006
333082
|
}
|
|
333007
333083
|
});
|
|
333008
|
-
await
|
|
333084
|
+
await _voiceChatSession2.start();
|
|
333009
333085
|
},
|
|
333010
333086
|
async voiceChatStop() {
|
|
333011
|
-
if (
|
|
333012
|
-
await
|
|
333013
|
-
|
|
333087
|
+
if (_voiceChatSession2?.isActive) {
|
|
333088
|
+
await _voiceChatSession2.stop();
|
|
333089
|
+
_voiceChatSession2 = null;
|
|
333014
333090
|
}
|
|
333015
333091
|
},
|
|
333016
333092
|
isVoiceChatActive() {
|
|
333017
|
-
return
|
|
333093
|
+
return _voiceChatSession2?.isActive ?? false;
|
|
333018
333094
|
},
|
|
333019
333095
|
async exposeStart(kindOrUrl, authKey, transport, fullAccess, passthrough, loadbalance) {
|
|
333020
333096
|
const knownKinds = ["ollama", "vllm", "llvm", "passthrough"];
|
package/package.json
CHANGED