npm - @everworker/oneringai - Versions diffs - 0.4.6 → 0.4.7 - Mend

@everworker/oneringai 0.4.6 → 0.4.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

package/README.md +17 -1
package/dist/capabilities/agents/index.d.cts +1 -1
package/dist/capabilities/agents/index.d.ts +1 -1
package/dist/{index-oBtp-8Qn.d.ts → index-Blci0FEd.d.ts} +47 -3
package/dist/{index-DJ-qAK15.d.cts → index-D8RCwpK9.d.cts} +47 -3
package/dist/index.cjs +789 -0
package/dist/index.cjs.map +1 -1
package/dist/index.d.cts +355 -4
package/dist/index.d.ts +355 -4
package/dist/index.js +784 -1
package/dist/index.js.map +1 -1
package/dist/shared/index.cjs +3 -0
package/dist/shared/index.cjs.map +1 -1
package/dist/shared/index.js +3 -0
package/dist/shared/index.js.map +1 -1
package/package.json +3 -3

package/dist/index.cjs CHANGED Viewed

@@ -18,6 +18,7 @@ var spawn = require('cross-spawn');
 var process2 = require('process');
 var stream = require('stream');
 var fs17 = require('fs/promises');
+var events = require('events');
 var simpleIcons = require('simple-icons');
 var child_process = require('child_process');
 var util = require('util');
@@ -12774,6 +12775,9 @@ var MODEL_REGISTRY = {
       video: false,
       batchAPI: true,
       promptCaching: true,
+      parameters: {
+        temperature: false
+      },
       input: {
         tokens: 128e3,
         text: true,
@@ -20931,6 +20935,9 @@ var StreamEventType = /* @__PURE__ */ ((StreamEventType2) => {
   StreamEventType2["REASONING_DONE"] = "response.reasoning.done";
   StreamEventType2["RESPONSE_COMPLETE"] = "response.complete";
   StreamEventType2["ERROR"] = "response.error";
+  StreamEventType2["AUDIO_CHUNK_READY"] = "response.audio_chunk.ready";
+  StreamEventType2["AUDIO_CHUNK_ERROR"] = "response.audio_chunk.error";
+  StreamEventType2["AUDIO_STREAM_COMPLETE"] = "response.audio_stream.complete";
   return StreamEventType2;
 })(StreamEventType || {});
 function isStreamEvent(event, type) {
@@ -20960,6 +20967,15 @@ function isResponseComplete(event) {
 function isErrorEvent(event) {
   return event.type === "response.error" /* ERROR */;
 }
+function isAudioChunkReady(event) {
+  return event.type === "response.audio_chunk.ready" /* AUDIO_CHUNK_READY */;
+}
+function isAudioChunkError(event) {
+  return event.type === "response.audio_chunk.error" /* AUDIO_CHUNK_ERROR */;
+}
+function isAudioStreamComplete(event) {
+  return event.type === "response.audio_stream.complete" /* AUDIO_STREAM_COMPLETE */;
+}
 // src/infrastructure/providers/openai/OpenAIResponsesStreamConverter.ts
 var OpenAIResponsesStreamConverter = class {
@@ -34267,6 +34283,56 @@ var OpenAITTSProvider = class extends BaseMediaProvider {
       { model: options.model, voice: options.voice }
     );
   }
+  /**
+   * Check if streaming is supported for the given format
+   */
+  supportsStreaming(format) {
+    if (!format) return true;
+    return ["pcm", "wav", "mp3", "opus", "aac", "flac"].includes(format);
+  }
+  /**
+   * Stream TTS audio chunks as they arrive from the API
+   */
+  async *synthesizeStream(options) {
+    const format = this.mapFormat(options.format);
+    const requestParams = {
+      model: options.model,
+      input: options.input,
+      voice: options.voice,
+      response_format: format,
+      speed: options.speed
+    };
+    if (options.vendorOptions?.instructions) {
+      requestParams.instructions = options.vendorOptions.instructions;
+    }
+    this.logOperationStart("tts.synthesizeStream", {
+      model: options.model,
+      voice: options.voice,
+      inputLength: options.input.length,
+      format
+    });
+    try {
+      const response = await this.client.audio.speech.create(requestParams);
+      const body = response.body;
+      if (!body) {
+        throw new Error("No response body from OpenAI TTS API");
+      }
+      let totalBytes = 0;
+      for await (const chunk of body) {
+        const buf = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk);
+        totalBytes += buf.length;
+        yield { audio: buf, isFinal: false };
+      }
+      yield { audio: Buffer.alloc(0), isFinal: true };
+      this.logOperationComplete("tts.synthesizeStream", {
+        model: options.model,
+        totalBytes
+      });
+    } catch (error) {
+      this.handleError(error);
+      throw error;
+    }
+  }
   /**
    * List available voices (returns static list for OpenAI)
    */
@@ -35009,6 +35075,35 @@ var TextToSpeech = class _TextToSpeech {
     const response = await this.synthesize(text, options);
     await fs17__namespace.writeFile(filePath, response.audio);
   }
+  // ======================== Streaming Methods ========================
+  /**
+   * Check if the underlying provider supports streaming TTS
+   */
+  supportsStreaming(format) {
+    const provider = this.provider;
+    return typeof provider.supportsStreaming === "function" && provider.supportsStreaming(format);
+  }
+  /**
+   * Stream TTS audio chunks as they arrive from the API.
+   * Falls back to buffered synthesis yielding a single chunk if provider doesn't support streaming.
+   */
+  async *synthesizeStream(text, options) {
+    const fullOptions = {
+      model: this.config.model ?? this.getDefaultModel(),
+      input: text,
+      voice: options?.voice ?? this.config.voice ?? this.getDefaultVoice(),
+      format: options?.format ?? this.config.format,
+      speed: options?.speed ?? this.config.speed,
+      vendorOptions: options?.vendorOptions
+    };
+    const provider = this.provider;
+    if (typeof provider.synthesizeStream === "function" && provider.supportsStreaming?.(fullOptions.format)) {
+      yield* provider.synthesizeStream(fullOptions);
+    } else {
+      const response = await this.provider.synthesize(fullOptions);
+      yield { audio: response.audio, isFinal: true };
+    }
+  }
   // ======================== Introspection Methods ========================
   /**
    * Get model information for current or specified model
@@ -38713,6 +38808,694 @@ var VideoGeneration = class _VideoGeneration {
   }
 };
+// src/capabilities/speech/SentenceSplitter.ts
+var DEFAULT_ABBREVIATIONS = /* @__PURE__ */ new Set([
+  "dr.",
+  "mr.",
+  "mrs.",
+  "ms.",
+  "prof.",
+  "sr.",
+  "jr.",
+  "st.",
+  "ave.",
+  "blvd.",
+  "rd.",
+  "u.s.",
+  "u.k.",
+  "u.s.a.",
+  "u.n.",
+  "e.g.",
+  "i.e.",
+  "etc.",
+  "vs.",
+  "viz.",
+  "approx.",
+  "dept.",
+  "est.",
+  "inc.",
+  "ltd.",
+  "corp.",
+  "no.",
+  "vol.",
+  "rev.",
+  "gen.",
+  "gov.",
+  "jan.",
+  "feb.",
+  "mar.",
+  "apr.",
+  "jun.",
+  "jul.",
+  "aug.",
+  "sep.",
+  "oct.",
+  "nov.",
+  "dec.",
+  "fig.",
+  "eq.",
+  "ref.",
+  "sec.",
+  "ch.",
+  "min.",
+  "max.",
+  "avg."
+]);
+var DEFAULT_OPTIONS = {
+  minChunkLength: 20,
+  maxChunkLength: 500,
+  skipCodeBlocks: true,
+  stripMarkdown: true,
+  additionalAbbreviations: []
+};
+var SentenceChunkingStrategy = class {
+  buffer = "";
+  inCodeBlock = false;
+  codeBlockBuffer = "";
+  options;
+  abbreviations;
+  constructor(options) {
+    this.options = { ...DEFAULT_OPTIONS, ...options };
+    this.abbreviations = /* @__PURE__ */ new Set([
+      ...DEFAULT_ABBREVIATIONS,
+      ...this.options.additionalAbbreviations.map((a) => a.toLowerCase())
+    ]);
+  }
+  feed(delta) {
+    this.buffer += delta;
+    return this.extractChunks();
+  }
+  flush() {
+    if (this.inCodeBlock) {
+      this.codeBlockBuffer = "";
+      this.inCodeBlock = false;
+    }
+    const text = this.cleanForSpeech(this.buffer.trim());
+    this.buffer = "";
+    return text.length > 0 ? text : null;
+  }
+  reset() {
+    this.buffer = "";
+    this.inCodeBlock = false;
+    this.codeBlockBuffer = "";
+  }
+  // ======================== Private Methods ========================
+  extractChunks() {
+    const chunks = [];
+    if (this.options.skipCodeBlocks) {
+      this.processCodeBlocks();
+    }
+    let paragraphIdx = this.buffer.indexOf("\n\n");
+    while (paragraphIdx !== -1) {
+      const chunk = this.buffer.slice(0, paragraphIdx).trim();
+      this.buffer = this.buffer.slice(paragraphIdx + 2);
+      if (chunk.length > 0) {
+        const cleaned = this.cleanForSpeech(chunk);
+        if (cleaned.length > 0) {
+          chunks.push(cleaned);
+        }
+      }
+      paragraphIdx = this.buffer.indexOf("\n\n");
+    }
+    let sentenceEnd = this.findSentenceBoundary();
+    while (sentenceEnd !== -1) {
+      const sentence = this.buffer.slice(0, sentenceEnd).trim();
+      this.buffer = this.buffer.slice(sentenceEnd).trimStart();
+      if (sentence.length > 0) {
+        const cleaned = this.cleanForSpeech(sentence);
+        if (cleaned.length > 0) {
+          chunks.push(cleaned);
+        }
+      }
+      sentenceEnd = this.findSentenceBoundary();
+    }
+    if (this.buffer.length > this.options.maxChunkLength) {
+      const splitChunks = this.splitLongText(this.buffer);
+      this.buffer = splitChunks.pop() ?? "";
+      for (const chunk of splitChunks) {
+        const cleaned = this.cleanForSpeech(chunk.trim());
+        if (cleaned.length > 0) {
+          chunks.push(cleaned);
+        }
+      }
+    }
+    return this.mergeSmallChunks(chunks);
+  }
+  /**
+   * Track and remove fenced code blocks from the buffer.
+   * Text inside code blocks is discarded (not spoken).
+   */
+  processCodeBlocks() {
+    let idx = 0;
+    let result = "";
+    while (idx < this.buffer.length) {
+      if (this.buffer.startsWith("```", idx)) {
+        if (this.inCodeBlock) {
+          this.inCodeBlock = false;
+          this.codeBlockBuffer = "";
+          idx += 3;
+          const newline = this.buffer.indexOf("\n", idx);
+          idx = newline !== -1 ? newline + 1 : this.buffer.length;
+        } else {
+          this.inCodeBlock = true;
+          this.codeBlockBuffer = "";
+          idx += 3;
+          const newline = this.buffer.indexOf("\n", idx);
+          idx = newline !== -1 ? newline + 1 : this.buffer.length;
+        }
+      } else if (this.inCodeBlock) {
+        this.codeBlockBuffer += this.buffer[idx];
+        idx++;
+      } else {
+        result += this.buffer[idx];
+        idx++;
+      }
+    }
+    this.buffer = result;
+  }
+  /**
+   * Find the position right after the next sentence boundary.
+   * Returns -1 if no complete sentence boundary found.
+   */
+  findSentenceBoundary() {
+    const terminators = [".", "?", "!"];
+    for (let i = 0; i < this.buffer.length; i++) {
+      const ch = this.buffer.charAt(i);
+      if (!terminators.includes(ch)) continue;
+      if (i + 1 >= this.buffer.length) return -1;
+      const nextChar = this.buffer[i + 1];
+      if (nextChar !== " " && nextChar !== "\n" && nextChar !== "\r" && nextChar !== "	") {
+        continue;
+      }
+      if (ch === ".") {
+        if (this.isAbbreviation(i)) continue;
+        if (this.isDecimalNumber(i)) continue;
+        if (this.isEllipsis(i)) continue;
+      }
+      const candidate = this.buffer.slice(0, i + 1).trim();
+      if (candidate.length < this.options.minChunkLength) continue;
+      return i + 1;
+    }
+    return -1;
+  }
+  /**
+   * Check if the period at position `pos` is part of a known abbreviation.
+   */
+  isAbbreviation(pos) {
+    let wordStart = pos - 1;
+    while (wordStart >= 0 && this.buffer[wordStart] !== " " && this.buffer[wordStart] !== "\n") {
+      wordStart--;
+    }
+    wordStart++;
+    const word = this.buffer.slice(wordStart, pos + 1).toLowerCase();
+    return this.abbreviations.has(word);
+  }
+  /**
+   * Check if the period at position `pos` is a decimal point.
+   * e.g., 3.14, $1.50
+   */
+  isDecimalNumber(pos) {
+    if (pos === 0 || pos + 1 >= this.buffer.length) return false;
+    const before = this.buffer.charAt(pos - 1);
+    const after = this.buffer.charAt(pos + 1);
+    return /\d/.test(before) && /\d/.test(after);
+  }
+  /**
+   * Check if the period at position `pos` is part of an ellipsis (...).
+   */
+  isEllipsis(pos) {
+    if (pos >= 2 && this.buffer[pos - 1] === "." && this.buffer[pos - 2] === ".") return true;
+    if (pos + 1 < this.buffer.length && this.buffer[pos + 1] === ".") return true;
+    return false;
+  }
+  /**
+   * Split text that exceeds maxChunkLength at clause boundaries.
+   */
+  splitLongText(text) {
+    const max = this.options.maxChunkLength;
+    const chunks = [];
+    let remaining = text;
+    while (remaining.length > max) {
+      let splitPos = -1;
+      const clauseBreaks = [",", ";", ":", " \u2014", " \u2013", " -"];
+      for (const brk of clauseBreaks) {
+        const searchRegion = remaining.slice(0, max);
+        const lastPos = searchRegion.lastIndexOf(brk);
+        if (lastPos > this.options.minChunkLength) {
+          splitPos = lastPos + brk.length;
+          break;
+        }
+      }
+      if (splitPos === -1) {
+        const searchRegion = remaining.slice(0, max);
+        splitPos = searchRegion.lastIndexOf(" ");
+        if (splitPos <= this.options.minChunkLength) {
+          splitPos = max;
+        }
+      }
+      chunks.push(remaining.slice(0, splitPos));
+      remaining = remaining.slice(splitPos);
+    }
+    chunks.push(remaining);
+    return chunks;
+  }
+  /**
+   * Merge chunks that are shorter than minChunkLength with the next chunk.
+   */
+  mergeSmallChunks(chunks) {
+    if (chunks.length <= 1) return chunks;
+    const merged = [];
+    let accumulator = "";
+    for (const chunk of chunks) {
+      if (accumulator.length > 0) {
+        accumulator += " " + chunk;
+      } else {
+        accumulator = chunk;
+      }
+      if (accumulator.length >= this.options.minChunkLength) {
+        merged.push(accumulator);
+        accumulator = "";
+      }
+    }
+    if (accumulator.length > 0) {
+      if (merged.length > 0) {
+        merged[merged.length - 1] += " " + accumulator;
+      } else {
+        merged.push(accumulator);
+      }
+    }
+    return merged;
+  }
+  /**
+   * Strip markdown formatting from text for natural speech.
+   */
+  cleanForSpeech(text) {
+    if (!this.options.stripMarkdown) return text;
+    let cleaned = text;
+    cleaned = cleaned.replace(/`([^`]+)`/g, "$1");
+    cleaned = cleaned.replace(/\*\*([^*]+)\*\*/g, "$1");
+    cleaned = cleaned.replace(/__([^_]+)__/g, "$1");
+    cleaned = cleaned.replace(/(?<!\*)\*([^*]+)\*(?!\*)/g, "$1");
+    cleaned = cleaned.replace(/(?<!_)_([^_]+)_(?!_)/g, "$1");
+    cleaned = cleaned.replace(/~~([^~]+)~~/g, "$1");
+    cleaned = cleaned.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
+    cleaned = cleaned.replace(/!\[([^\]]*)\]\([^)]+\)/g, "");
+    cleaned = cleaned.replace(/^#{1,6}\s+/gm, "");
+    cleaned = cleaned.replace(/^[-*+]\s+/gm, "");
+    cleaned = cleaned.replace(/^\d+\.\s+/gm, "");
+    cleaned = cleaned.replace(/^>\s+/gm, "");
+    cleaned = cleaned.replace(/^[-*_]{3,}\s*$/gm, "");
+    cleaned = cleaned.replace(/\n+/g, " ");
+    cleaned = cleaned.replace(/\s{2,}/g, " ");
+    return cleaned.trim();
+  }
+};
+// src/capabilities/speech/VoiceStream.ts
+var VoiceStream = class _VoiceStream extends events.EventEmitter {
+  tts;
+  chunker;
+  format;
+  speed;
+  maxConcurrentTTS;
+  maxQueuedChunks;
+  vendorOptions;
+  streaming;
+  // Pipeline state
+  chunkIndex = 0;
+  totalCharacters = 0;
+  totalDuration = 0;
+  activeJobs = /* @__PURE__ */ new Map();
+  activeTTSCount = 0;
+  interrupted = false;
+  lastResponseId = "";
+  _isDestroyed = false;
+  // Semaphore for TTS concurrency control
+  slotWaiters = [];
+  // Audio event buffer for interleaving with text events
+  audioEventBuffer = [];
+  // Async notification: resolves when new events are pushed to audioEventBuffer
+  bufferNotify = null;
+  // Queue backpressure
+  queueWaiters = [];
+  /**
+   * Create a new VoiceStream instance
+   */
+  static create(config) {
+    return new _VoiceStream(config);
+  }
+  constructor(config) {
+    super();
+    this.tts = TextToSpeech.create({
+      connector: config.ttsConnector,
+      model: config.ttsModel,
+      voice: config.voice
+    });
+    this.chunker = config.chunkingStrategy ?? new SentenceChunkingStrategy(config.chunkingOptions);
+    this.format = config.format ?? "mp3";
+    this.speed = config.speed ?? 1;
+    this.maxConcurrentTTS = config.maxConcurrentTTS ?? 2;
+    this.maxQueuedChunks = config.maxQueuedChunks ?? 5;
+    this.vendorOptions = config.vendorOptions;
+    this.streaming = config.streaming ?? false;
+  }
+  // ======================== Public API ========================
+  /**
+   * Transform an agent text stream into an augmented stream with audio events.
+   * Original text events pass through unchanged; audio events are interleaved.
+   *
+   * The generator yields events in this order:
+   * 1. All original StreamEvents (pass-through)
+   * 2. AudioChunkReady/AudioChunkError events as TTS completes
+   * 3. AudioStreamComplete as the final audio event
+   */
+  async *wrap(textStream) {
+    this.reset();
+    try {
+      for await (const event of textStream) {
+        yield event;
+        if (event.response_id) {
+          this.lastResponseId = event.response_id;
+        }
+        if (event.type === "response.output_text.delta" /* OUTPUT_TEXT_DELTA */ && !this.interrupted) {
+          const completedChunks = this.chunker.feed(event.delta);
+          for (const chunk of completedChunks) {
+            await this.scheduleTTS(chunk);
+          }
+        }
+        if ((event.type === "response.output_text.done" /* OUTPUT_TEXT_DONE */ || event.type === "response.complete" /* RESPONSE_COMPLETE */) && !this.interrupted) {
+          const remaining = this.chunker.flush();
+          if (remaining) {
+            await this.scheduleTTS(remaining);
+          }
+        }
+        yield* this.drainAudioBuffer();
+      }
+      while (this.activeJobs.size > 0 || this.audioEventBuffer.length > 0) {
+        if (this.audioEventBuffer.length === 0) {
+          await Promise.race([
+            this.waitForBufferNotify(),
+            ...Array.from(this.activeJobs.values()).map((j) => j.promise)
+          ]);
+        }
+        yield* this.drainAudioBuffer();
+      }
+      if (this.chunkIndex > 0) {
+        const completeEvent = {
+          type: "response.audio_stream.complete" /* AUDIO_STREAM_COMPLETE */,
+          response_id: this.lastResponseId,
+          total_chunks: this.chunkIndex,
+          total_characters: this.totalCharacters,
+          total_duration_seconds: this.totalDuration > 0 ? this.totalDuration : void 0
+        };
+        yield completeEvent;
+        this.emit("audio:complete", {
+          totalChunks: this.chunkIndex,
+          totalDurationSeconds: this.totalDuration > 0 ? this.totalDuration : void 0
+        });
+      }
+    } finally {
+      this.cleanup();
+    }
+  }
+  /**
+   * Interrupt audio generation. Cancels pending TTS and flushes queue.
+   * Call this when the user sends a new message mid-speech.
+   * Active HTTP requests cannot be cancelled but their results will be discarded.
+   */
+  interrupt() {
+    this.interrupted = true;
+    const pendingCount = this.activeJobs.size;
+    this.activeJobs.clear();
+    this.activeTTSCount = 0;
+    this.audioEventBuffer = [];
+    this.releaseAllWaiters();
+    this.chunker.reset();
+    this.emit("audio:interrupted", { pendingChunks: pendingCount });
+  }
+  /**
+   * Reset state for a new stream. Called automatically by wrap().
+   */
+  reset() {
+    this.chunkIndex = 0;
+    this.totalCharacters = 0;
+    this.totalDuration = 0;
+    this.activeJobs.clear();
+    this.activeTTSCount = 0;
+    this.interrupted = false;
+    this.lastResponseId = "";
+    this.audioEventBuffer = [];
+    this.bufferNotify = null;
+    this.slotWaiters = [];
+    this.queueWaiters = [];
+    this.chunker.reset();
+  }
+  destroy() {
+    this.interrupt();
+    this._isDestroyed = true;
+    this.removeAllListeners();
+  }
+  get isDestroyed() {
+    return this._isDestroyed;
+  }
+  // ======================== Private Methods ========================
+  /**
+   * Schedule a text chunk for TTS synthesis.
+   * Awaits a free queue slot if backpressure is active (lossless).
+   */
+  async scheduleTTS(text) {
+    if (this.interrupted || this._isDestroyed) return;
+    const cleanText = text.trim();
+    if (cleanText.length === 0) return;
+    while (this.activeJobs.size >= this.maxQueuedChunks && !this.interrupted) {
+      await this.waitForQueueSlot();
+    }
+    if (this.interrupted) return;
+    const index = this.chunkIndex++;
+    this.totalCharacters += cleanText.length;
+    const job = {
+      index,
+      text: cleanText,
+      promise: this.executeTTS(index, cleanText)
+    };
+    this.activeJobs.set(index, job);
+    job.promise.finally(() => {
+      this.activeJobs.delete(index);
+      this.releaseQueueWaiter();
+    });
+  }
+  /**
+   * Execute TTS for a single text chunk.
+   * Respects concurrency semaphore.
+   * Branches on streaming mode: yields sub-chunks or a single buffered chunk.
+   */
+  async executeTTS(index, text) {
+    while (this.activeTTSCount >= this.maxConcurrentTTS && !this.interrupted) {
+      await this.waitForTTSSlot();
+    }
+    if (this.interrupted) return;
+    this.activeTTSCount++;
+    try {
+      const ttsStart = Date.now();
+      if (this.streaming && this.tts.supportsStreaming(this.format)) {
+        let subIndex = 0;
+        const streamFormat = this.format === "mp3" ? "pcm" : this.format;
+        const MIN_BUFFER_BYTES = 6e3;
+        const pendingBuffers = [];
+        let pendingSize = 0;
+        const flushPending = () => {
+          if (pendingSize === 0) return;
+          const merged = Buffer.concat(pendingBuffers, pendingSize);
+          pendingBuffers.length = 0;
+          pendingSize = 0;
+          const currentSubIndex = subIndex++;
+          const audioEvent = {
+            type: "response.audio_chunk.ready" /* AUDIO_CHUNK_READY */,
+            response_id: this.lastResponseId,
+            chunk_index: index,
+            sub_index: currentSubIndex,
+            text: currentSubIndex === 0 ? text : "",
+            audio_base64: merged.toString("base64"),
+            format: streamFormat
+          };
+          this.pushAudioEvent(audioEvent);
+        };
+        for await (const chunk of this.tts.synthesizeStream(text, {
+          format: streamFormat,
+          speed: this.speed,
+          vendorOptions: this.vendorOptions
+        })) {
+          if (this.interrupted) return;
+          if (chunk.audio.length > 0) {
+            pendingBuffers.push(chunk.audio);
+            pendingSize += chunk.audio.length;
+            if (pendingSize >= MIN_BUFFER_BYTES) {
+              flushPending();
+            }
+          }
+          if (chunk.isFinal) {
+            break;
+          }
+        }
+        flushPending();
+        console.log(`[VoiceStream] TTS chunk ${index} streamed ${subIndex} sub-chunks in ${Date.now() - ttsStart}ms, text: "${text.slice(0, 40)}..."`);
+        this.emit("audio:ready", { chunkIndex: index, text });
+      } else {
+        const response = await this.tts.synthesize(text, {
+          format: this.format,
+          speed: this.speed,
+          vendorOptions: this.vendorOptions
+        });
+        if (this.interrupted) return;
+        if (response.durationSeconds) {
+          this.totalDuration += response.durationSeconds;
+        }
+        const audioEvent = {
+          type: "response.audio_chunk.ready" /* AUDIO_CHUNK_READY */,
+          response_id: this.lastResponseId,
+          chunk_index: index,
+          text,
+          audio_base64: response.audio.toString("base64"),
+          format: response.format,
+          duration_seconds: response.durationSeconds,
+          characters_used: response.charactersUsed
+        };
+        this.pushAudioEvent(audioEvent);
+        console.log(`[VoiceStream] TTS chunk ${index} ready in ${Date.now() - ttsStart}ms, text: "${text.slice(0, 40)}..."`);
+        this.emit("audio:ready", {
+          chunkIndex: index,
+          text,
+          durationSeconds: response.durationSeconds
+        });
+      }
+    } catch (error) {
+      if (this.interrupted) return;
+      const errorEvent = {
+        type: "response.audio_chunk.error" /* AUDIO_CHUNK_ERROR */,
+        response_id: this.lastResponseId,
+        chunk_index: index,
+        text,
+        error: error.message
+      };
+      this.pushAudioEvent(errorEvent);
+      this.emit("audio:error", {
+        chunkIndex: index,
+        text,
+        error
+      });
+    } finally {
+      this.activeTTSCount--;
+      this.releaseTTSSlot();
+    }
+  }
+  /**
+   * Drain the audio event buffer, yielding all ready events.
+   */
+  *drainAudioBuffer() {
+    while (this.audioEventBuffer.length > 0) {
+      yield this.audioEventBuffer.shift();
+    }
+  }
+  // ======================== Buffer Notification ========================
+  /**
+   * Push an audio event and wake up the consumer in wrap()
+   */
+  pushAudioEvent(event) {
+    this.audioEventBuffer.push(event);
+    if (this.bufferNotify) {
+      this.bufferNotify();
+      this.bufferNotify = null;
+    }
+  }
+  /**
+   * Wait until a new event is pushed to the audio buffer
+   */
+  waitForBufferNotify() {
+    return new Promise((resolve4) => {
+      this.bufferNotify = resolve4;
+    });
+  }
+  // ======================== Semaphore / Backpressure ========================
+  waitForTTSSlot() {
+    return new Promise((resolve4) => {
+      this.slotWaiters.push(resolve4);
+    });
+  }
+  releaseTTSSlot() {
+    const waiter = this.slotWaiters.shift();
+    if (waiter) waiter();
+  }
+  waitForQueueSlot() {
+    return new Promise((resolve4) => {
+      this.queueWaiters.push(resolve4);
+    });
+  }
+  releaseQueueWaiter() {
+    const waiter = this.queueWaiters.shift();
+    if (waiter) waiter();
+  }
+  releaseAllWaiters() {
+    for (const waiter of this.slotWaiters) waiter();
+    this.slotWaiters = [];
+    for (const waiter of this.queueWaiters) waiter();
+    this.queueWaiters = [];
+    if (this.bufferNotify) {
+      this.bufferNotify();
+      this.bufferNotify = null;
+    }
+  }
+  cleanup() {
+    this.releaseAllWaiters();
+  }
+};
+// src/capabilities/speech/AudioPlaybackQueue.ts
+var AudioPlaybackQueue = class {
+  buffer = /* @__PURE__ */ new Map();
+  nextPlayIndex = 0;
+  onReady;
+  constructor(onReady) {
+    this.onReady = onReady;
+  }
+  /**
+   * Enqueue an audio chunk event. If it's the next expected chunk,
+   * it (and any subsequent buffered chunks) are immediately delivered
+   * to the callback in order.
+   */
+  enqueue(event) {
+    this.buffer.set(event.chunk_index, event);
+    this.drain();
+  }
+  /**
+   * Reset the queue (e.g., on interruption or new stream).
+   */
+  reset() {
+    this.buffer.clear();
+    this.nextPlayIndex = 0;
+  }
+  /**
+   * Number of chunks currently buffered waiting for earlier chunks.
+   */
+  get pendingCount() {
+    return this.buffer.size;
+  }
+  /**
+   * The next chunk index expected for playback.
+   */
+  get nextExpectedIndex() {
+    return this.nextPlayIndex;
+  }
+  // ======================== Private ========================
+  drain() {
+    while (this.buffer.has(this.nextPlayIndex)) {
+      const event = this.buffer.get(this.nextPlayIndex);
+      this.buffer.delete(this.nextPlayIndex);
+      this.nextPlayIndex++;
+      this.onReady(event);
+    }
+  }
+};
 // src/capabilities/search/SearchProvider.ts
 init_Connector();
@@ -55753,6 +56536,7 @@ exports.APPROVAL_STATE_VERSION = APPROVAL_STATE_VERSION;
 exports.Agent = Agent;
 exports.AgentContextNextGen = AgentContextNextGen;
 exports.ApproximateTokenEstimator = ApproximateTokenEstimator;
+exports.AudioPlaybackQueue = AudioPlaybackQueue;
 exports.BaseMediaProvider = BaseMediaProvider;
 exports.BasePluginNextGen = BasePluginNextGen;
 exports.BaseProvider = BaseProvider;
@@ -55847,6 +56631,7 @@ exports.STT_MODELS = STT_MODELS;
 exports.STT_MODEL_REGISTRY = STT_MODEL_REGISTRY;
 exports.ScrapeProvider = ScrapeProvider;
 exports.SearchProvider = SearchProvider;
+exports.SentenceChunkingStrategy = SentenceChunkingStrategy;
 exports.SerperProvider = SerperProvider;
 exports.Services = Services;
 exports.SimpleScheduler = SimpleScheduler;
@@ -55882,6 +56667,7 @@ exports.VIDEO_MODELS = VIDEO_MODELS;
 exports.VIDEO_MODEL_REGISTRY = VIDEO_MODEL_REGISTRY;
 exports.Vendor = Vendor;
 exports.VideoGeneration = VideoGeneration;
+exports.VoiceStream = VoiceStream;
 exports.WorkingMemory = WorkingMemory;
 exports.WorkingMemoryPluginNextGen = WorkingMemoryPluginNextGen;
 exports.addJitter = addJitter;
@@ -56070,6 +56856,9 @@ exports.grep = grep;
 exports.hasClipboardImage = hasClipboardImage;
 exports.hasVendorLogo = hasVendorLogo;
 exports.hydrateCustomTool = hydrateCustomTool;
+exports.isAudioChunkError = isAudioChunkError;
+exports.isAudioChunkReady = isAudioChunkReady;
+exports.isAudioStreamComplete = isAudioStreamComplete;
 exports.isBlockedCommand = isBlockedCommand;
 exports.isErrorEvent = isErrorEvent;
 exports.isExcludedExtension = isExcludedExtension;