open-agents-ai 0.187.277 → 0.187.278

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/dist/index.js +92 -16
  2. package/package.json +1 -1
package/dist/index.js CHANGED
@@ -294676,6 +294676,8 @@ var init_voice = __esm({
294676
294676
  currentPlayback = null;
294677
294677
  speakQueue = [];
294678
294678
  speaking = false;
294679
+ drainPromise = null;
294680
+ drainResolve = null;
294679
294681
  phonemizeFn = null;
294680
294682
  /** True when current model uses MLX Audio backend */
294681
294683
  mlxActive = false;
@@ -294992,6 +294994,15 @@ var init_voice = __esm({
294992
294994
  const speedFactor = emotion ? emotionToSpeedFactor(emotion, this.starkMode, this.autistMode) : 1;
294993
294995
  this.enqueueSpeech(text, 0.55, 1 + pitchBias, speedFactor, 0.15);
294994
294996
  }
294997
+ /** Wait until the speak queue is fully drained (all audio played). */
294998
+ async waitUntilIdle() {
294999
+ if (!this.speaking && this.speakQueue.length === 0) return;
295000
+ if (this.drainPromise) {
295001
+ await this.drainPromise;
295002
+ } else {
295003
+ await this.sleep(100);
295004
+ }
295005
+ }
294995
295006
  enqueueSpeech(text, volume, pitchFactor, speedFactor = 1, stereoDelayMs = 0.6) {
294996
295007
  if (!this.enabled || !this.ready) return;
294997
295008
  text = sanitizeForTTS(text);
@@ -295190,6 +295201,11 @@ var init_voice = __esm({
295190
295201
  */
295191
295202
  async drainQueue() {
295192
295203
  this.speaking = true;
295204
+ if (!this.drainPromise) {
295205
+ this.drainPromise = new Promise((resolve40) => {
295206
+ this.drainResolve = resolve40;
295207
+ });
295208
+ }
295193
295209
  let isFirst = true;
295194
295210
  let prefetchedWav = null;
295195
295211
  while (this.speakQueue.length > 0) {
@@ -295241,6 +295257,14 @@ var init_voice = __esm({
295241
295257
  }
295242
295258
  }
295243
295259
  this.speaking = false;
295260
+ if (this.drainResolve) {
295261
+ try {
295262
+ this.drainResolve();
295263
+ } catch {
295264
+ }
295265
+ }
295266
+ this.drainResolve = null;
295267
+ this.drainPromise = null;
295244
295268
  }
295245
295269
  sleep(ms) {
295246
295270
  return new Promise((resolve40) => setTimeout(resolve40, ms));
@@ -328218,13 +328242,12 @@ __export(voicechat_exports, {
328218
328242
  VoiceChatSession: () => VoiceChatSession
328219
328243
  });
328220
328244
  import { EventEmitter as EventEmitter10 } from "node:events";
328221
- var VAD_SILENCE_MS, MAX_SEGMENT_MS, SUMMARY_INJECTION_INTERVAL, MAX_CONTEXT_TURNS, SYSTEM_PROMPT2, VoiceChatSession;
328245
+ var VAD_SILENCE_MS, MAX_SEGMENT_MS, MAX_CONTEXT_TURNS, SYSTEM_PROMPT2, VoiceChatSession;
328222
328246
  var init_voicechat = __esm({
328223
328247
  "packages/cli/src/tui/voicechat.ts"() {
328224
328248
  "use strict";
328225
- VAD_SILENCE_MS = 1100;
328249
+ VAD_SILENCE_MS = 2e3;
328226
328250
  MAX_SEGMENT_MS = 6500;
328227
- SUMMARY_INJECTION_INTERVAL = 4;
328228
328251
  MAX_CONTEXT_TURNS = 20;
328229
328252
  SYSTEM_PROMPT2 = `You are a voice assistant having a live spoken conversation. Keep responses extremely brief — 1-2 sentences max. You're speaking aloud, not writing. Be conversational, direct, and helpful. Don't use markdown, bullet points, or formatting — just natural speech. If you don't know something, say so briefly. Do not over-think — respond quickly and concisely.`;
328230
328253
  VoiceChatSession = class extends EventEmitter10 {
@@ -328256,6 +328279,7 @@ var init_voicechat = __esm({
328256
328279
  // Bound handlers for cleanup
328257
328280
  _onTranscript = null;
328258
328281
  _onError = null;
328282
+ _retryMicTimer = null;
328259
328283
  constructor(opts) {
328260
328284
  super();
328261
328285
  this.voice = opts.voice;
@@ -328321,6 +328345,19 @@ var init_voicechat = __esm({
328321
328345
  this._onError = (err) => {
328322
328346
  const msg = err instanceof Error ? err.message : String(err);
328323
328347
  this.onStatus(`ASR error (voicechat continues without mic): ${msg.slice(0, 80)}`);
328348
+ if (this.active && !this._retryMicTimer) {
328349
+ this._retryMicTimer = setTimeout(async () => {
328350
+ this._retryMicTimer = null;
328351
+ if (!this.active) return;
328352
+ try {
328353
+ await this.listen.stop().catch(() => {
328354
+ });
328355
+ await this.listen.start();
328356
+ this.onStatus("Mic auto-recovered — LISTENING");
328357
+ } catch {
328358
+ }
328359
+ }, 1e3);
328360
+ }
328324
328361
  };
328325
328362
  this.listen.on("transcript", this._onTranscript);
328326
328363
  this.listen.on("error", this._onError);
@@ -328422,6 +328459,12 @@ var init_voicechat = __esm({
328422
328459
  this.onUserSpeech(text);
328423
328460
  this.context.push({ role: "user", content: text });
328424
328461
  this.turnCount++;
328462
+ if (this.runner) {
328463
+ try {
328464
+ this.runner.injectUserMessage(`[VOICECHAT] ${text}`);
328465
+ } catch {
328466
+ }
328467
+ }
328425
328468
  while (this.context.length > MAX_CONTEXT_TURNS + 1) {
328426
328469
  this.context.splice(1, 1);
328427
328470
  }
@@ -328443,11 +328486,18 @@ var init_voicechat = __esm({
328443
328486
  this.setState("SPEAKING");
328444
328487
  this.onAgentSpeech(response.trim());
328445
328488
  this.voice.speak(response.trim());
328446
- if (this.runner && this.turnCount % SUMMARY_INJECTION_INTERVAL === 0) {
328489
+ if (this.runner) {
328447
328490
  this.injectSummary();
328448
328491
  }
328449
- const estimatedMs = Math.max(1500, response.length / 5 * (6e4 / 150));
328450
- await new Promise((r2) => setTimeout(r2, estimatedMs));
328492
+ if (typeof this.voice.waitUntilIdle === "function") {
328493
+ try {
328494
+ await this.voice.waitUntilIdle();
328495
+ } catch {
328496
+ }
328497
+ } else {
328498
+ const estimatedMs = Math.max(1500, response.length / 5 * (6e4 / 150));
328499
+ await new Promise((r2) => setTimeout(r2, estimatedMs));
328500
+ }
328451
328501
  }
328452
328502
  } catch (err) {
328453
328503
  if (!this.active) return;
@@ -328570,13 +328620,19 @@ var init_voicechat = __esm({
328570
328620
  // ---------------------------------------------------------------------------
328571
328621
  injectSummary() {
328572
328622
  if (!this.runner) return;
328573
- const recentTurns = this.context.filter((t2) => t2.role !== "system").slice(-6).map((t2) => `${t2.role === "user" ? "User" : "Assistant"}: ${t2.content}`).join("\n");
328623
+ const recentTurns = this.context.filter((t2) => t2.role !== "system").slice(-8).map((t2) => `${t2.role === "user" ? "User" : "Assistant"}: ${t2.content}`).join("\n");
328574
328624
  this.runner.injectUserMessage(
328575
- `[VOICECHAT SUMMARY] The following is a summary of the recent voice conversation happening in parallel. You don't need to respond to this directly — it's for your awareness. Continue your current task.
328625
+ `[VOICECHAT SUMMARY] Parallel voice liaison update (for awareness only). Continue your current task; do not respond to this directly.
328576
328626
 
328577
328627
  ${recentTurns}`
328578
328628
  );
328579
328629
  }
328630
+ /** Enqueue narration from main agent events into the voice channel */
328631
+ enqueueAgentNarration(text, subordinate = true) {
328632
+ if (!text || !this.active) return;
328633
+ if (subordinate) this.voice.speakSubordinate(text);
328634
+ else this.voice.speak(text);
328635
+ }
328580
328636
  };
328581
328637
  }
328582
328638
  });
@@ -330156,6 +330212,17 @@ ${entry.fullContent}`
330156
330212
  }
330157
330213
  });
330158
330214
  }
330215
+ if (voice?.enabled && voice.voiceMode === "voicechat" && _voiceChatSession?.isActive && event.toolName === "task_complete") {
330216
+ const emoStateFinal = emotionEngine?.getState();
330217
+ const emoCtxFinal = emoStateFinal ? { valence: emoStateFinal.valence, arousal: emoStateFinal.arousal, label: emoStateFinal.label, emoji: emoStateFinal.emoji } : void 0;
330218
+ const desc = describeTaskComplete(String(event.content ?? ""), true, vLevel);
330219
+ if (desc) {
330220
+ try {
330221
+ _voiceChatSession.enqueueAgentNarration(desc, false);
330222
+ } catch {
330223
+ }
330224
+ }
330225
+ }
330159
330226
  break;
330160
330227
  }
330161
330228
  case "model_response":
@@ -330237,6 +330304,15 @@ ${entry.fullContent}`
330237
330304
  voice.speak(chatText);
330238
330305
  voice._spokenStreamText = true;
330239
330306
  }
330307
+ } else if (voice?.enabled && voice.voiceMode === "voicechat" && (streamTextBuffer || event.content)) {
330308
+ const chatText = (streamTextBuffer || event.content || "").trim();
330309
+ streamTextBuffer = "";
330310
+ if (chatText.length > 10 && _voiceChatSession?.isActive) {
330311
+ try {
330312
+ _voiceChatSession.enqueueAgentNarration(chatText, false);
330313
+ } catch {
330314
+ }
330315
+ }
330240
330316
  }
330241
330317
  break;
330242
330318
  }
@@ -331274,7 +331350,7 @@ ${opts.systemPromptAddition}` : `Working directory: ${repoRoot}`;
331274
331350
  autoUpdateTimer.unref();
331275
331351
  const voiceEngine = new VoiceEngine();
331276
331352
  let voiceSession = null;
331277
- let _voiceChatSession = null;
331353
+ let _voiceChatSession2 = null;
331278
331354
  let tunnelGateway = null;
331279
331355
  let p2pGateway = null;
331280
331356
  let peerMesh = null;
@@ -332963,7 +333039,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
332963
333039
  },
332964
333040
  // --- /voicechat: Voryn-style state machine voice conversation ---
332965
333041
  async voiceChatStart() {
332966
- if (_voiceChatSession?.isActive) return;
333042
+ if (_voiceChatSession2?.isActive) return;
332967
333043
  if (!voiceEngine.enabled || !voiceEngine.ready) {
332968
333044
  writeContent(() => renderInfo2("Auto-enabling voice for voice chat..."));
332969
333045
  const voiceMsg = await voiceEngine.toggle();
@@ -332980,7 +333056,7 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
332980
333056
  }
332981
333057
  }
332982
333058
  };
332983
- _voiceChatSession = new VoiceChatSession2({
333059
+ _voiceChatSession2 = new VoiceChatSession2({
332984
333060
  voice: voiceEngine,
332985
333061
  listen: listenEng,
332986
333062
  backendUrl: currentConfig.backendUrl,
@@ -333005,16 +333081,16 @@ Respond concisely and safely. Remember: you are talking to the general public.`;
333005
333081
  writeContent(() => renderInfo2(`\x1B[38;5;243m[voicechat] ${state}\x1B[0m`));
333006
333082
  }
333007
333083
  });
333008
- await _voiceChatSession.start();
333084
+ await _voiceChatSession2.start();
333009
333085
  },
333010
333086
  async voiceChatStop() {
333011
- if (_voiceChatSession?.isActive) {
333012
- await _voiceChatSession.stop();
333013
- _voiceChatSession = null;
333087
+ if (_voiceChatSession2?.isActive) {
333088
+ await _voiceChatSession2.stop();
333089
+ _voiceChatSession2 = null;
333014
333090
  }
333015
333091
  },
333016
333092
  isVoiceChatActive() {
333017
- return _voiceChatSession?.isActive ?? false;
333093
+ return _voiceChatSession2?.isActive ?? false;
333018
333094
  },
333019
333095
  async exposeStart(kindOrUrl, authKey, transport, fullAccess, passthrough, loadbalance) {
333020
333096
  const knownKinds = ["ollama", "vllm", "llvm", "passthrough"];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "open-agents-ai",
3
- "version": "0.187.277",
3
+ "version": "0.187.278",
4
4
  "description": "AI coding agent powered by open-source models (Ollama/vLLM) — interactive TUI with agentic tool-calling loop",
5
5
  "type": "module",
6
6
  "main": "./dist/index.js",