PyPI - ai-interview-assistant - Versions diffs - 2.2.2__tar.gz → 2.2.4__tar.gz - Mend

ai-interview-assistant 2.2.2tar.gz → 2.2.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-interview-assistant
-Version: 2.2.2
+Version: 2.2.4
 Summary: Ghost background AI assistant for live code challenges
 Requires-Python: >=3.11
 Requires-Dist: click>=8.0

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ai-interview-assistant"
-version = "2.2.2"
+version = "2.2.4"
 description = "Ghost background AI assistant for live code challenges"
 requires-python = ">=3.11"
 dependencies = [

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/src/ai_interview/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """AI Interview Assistant — ghost background tool for live code challenges."""
-__version__ = "2.2.2"
+__version__ = "2.2.4"

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/src/ai_interview/audio/capture.py RENAMED Viewed

@@ -285,12 +285,50 @@ class ScreenCaptureAudio:
 # Combined capture: mic + system audio
 # ---------------------------------------------------------------------------
+class _QueueAudioSource:
+    """Minimal capture-like adapter over a single Queue.
+    Exposes the same ``get_audio`` / ``flush`` surface a DeepgramTranscriber pulls
+    from, so a second transcriber can consume the candidate (mic) stream from its
+    own queue without knowing about CombinedAudioCapture internals.
+    """
+    def __init__(self, queue: "Queue") -> None:
+        self._queue = queue
+    def get_audio(self, timeout: float = 0.1) -> Optional[np.ndarray]:
+        try:
+            return self._queue.get(timeout=timeout)
+        except Empty:
+            return None
+    def flush(self) -> int:
+        dropped = 0
+        while True:
+            try:
+                self._queue.get_nowait()
+                dropped += 1
+            except Empty:
+                return dropped
 class CombinedAudioCapture:
     """Mixes microphone + system audio and feeds a Queue for the transcriber."""
+    # Each queued item is one 10ms chunk (see _mix_loop). Keep the transcriber in
+    # real-time phase: if the consumer (Deepgram feed loop) falls behind, drop the
+    # oldest chunks so emitted transcripts stay live instead of drifting late.
+    _STALL_TRIM_CHUNKS = 200   # ~2.0s of backlog triggers a trim
+    _STALL_TAIL_CHUNKS = 50    # leave ~0.5s of live tail after trimming
     def __init__(self, sample_rate: int = SAMPLE_RATE) -> None:
         self.sample_rate = sample_rate
         self.audio_queue: Queue[np.ndarray] = Queue()
+        # Separate queue carrying the microphone (candidate) stream when system
+        # audio is the primary source. Only fed while a mic transcriber is active
+        # (see _mic_routing) so it never grows unbounded when unused.
+        self.mic_queue: Queue[np.ndarray] = Queue()
+        self._mic_routing = False
         self._stop_event = Event()
         self._mic: Optional[MicrophoneCapture] = None
         self._sys: Optional[ScreenCaptureAudio] = None
@@ -323,58 +361,155 @@ class CombinedAudioCapture:
         only the interviewer's voice should be transcribed automatically.
         The microphone is only used for manual recordings (2x ESC hold).
         Falls back to mic-only if system audio is unavailable.
+        Latency design: each iteration drains the capture queues **fully** and
+        **non-blocking**, then emits **every** complete chunk. The previous design
+        blocked up to 20ms on the (discarded) mic queue and emitted only one chunk
+        per iteration — so whenever the loop dipped below the capture rate, audio
+        piled up in ``sys_buf`` (capped at 1s by its maxlen) and produced a steady,
+        invisible transcription lag. Draining fully keeps ``sys_buf`` near-empty so
+        audio reaches Deepgram in real time.
         """
         chunk_size = int(self.sample_rate * 0.01)  # 10ms — forward audio to Deepgram faster
         mic_buf: deque = deque(maxlen=self.sample_rate)
         sys_buf: deque = deque(maxlen=self.sample_rate)
-        _MAX_QUEUE_SIZE = self.sample_rate * 30  # ~30s of audio samples
         _last_drain_warn = 0.0
+        _trim_events = 0   # trims since last log — distinguishes a one-off spike
+        _trim_chunks = 0   # chunks dropped since last log  from a chronic stall
         while not self._stop_event.is_set():
-            # Always drain mic queue to prevent buffer buildup, but discard when system audio is active
-            mic_chunk = self._mic.get_audio(timeout=0.02) if self._mic else None
-            if mic_chunk is not None and not self._has_system_audio:
-                mic_buf.extend(mic_chunk.flatten())
+            # Drain the mic queue fully (non-blocking) so it never backs up. Keep
+            # the samples when mic is the fallback primary, or when a mic
+            # transcriber is routing the candidate stream; otherwise drain-and-
+            # discard.
+            if self._mic:
+                keep_mic = (not self._has_system_audio) or self._mic_routing
+                while True:
+                    mic_chunk = self._mic.get_audio(timeout=0)
+                    if mic_chunk is None:
+                        break
+                    if keep_mic:
+                        mic_buf.extend(mic_chunk.flatten())
+            # Drain the system-audio queue fully (non-blocking).
             if self._sys and self._sys.is_running:
-                sys_chunk = self._sys.get_audio(timeout=0.02)
-                if sys_chunk is not None:
+                while True:
+                    sys_chunk = self._sys.get_audio(timeout=0)
+                    if sys_chunk is None:
+                        break
                     sys_buf.extend(sys_chunk.flatten())
+            # Emit EVERY complete chunk this iteration (system audio decoupled from
+            # mic — it never waits on the mic having a chunk ready).
             if self._has_system_audio:
-                # System audio only — speaker voice, no mic bleed
-                if len(sys_buf) >= chunk_size:
-                    sys_data = np.array(
-                        [sys_buf.popleft() for _ in range(min(chunk_size, len(sys_buf)))],
-                        dtype=np.float32,
-                    )
-                    self.audio_queue.put(sys_data)
+                produced = self._emit_all_chunks(sys_buf, chunk_size, self.audio_queue)
+                # Candidate's own voice → separate queue for the mic transcriber.
+                if self._mic_routing:
+                    produced += self._emit_all_chunks(mic_buf, chunk_size, self.mic_queue)
             else:
-                # Fallback: mic only (no system audio available)
-                if len(mic_buf) >= chunk_size:
-                    mic_data = np.array(
-                        [mic_buf.popleft() for _ in range(min(chunk_size, len(mic_buf)))],
-                        dtype=np.float32,
-                    )
-                    self.audio_queue.put(mic_data)
+                produced = self._emit_all_chunks(mic_buf, chunk_size, self.audio_queue)
+            # Stall-recovery: if the downstream consumer (Deepgram feed loop) has
+            # fallen behind real-time, trim oldest queued chunks so transcripts stay
+            # live. Aggregate over a 30s window so a chronic stall (many trims) is
+            # distinguishable from a one-off spike and escalates to WARNING.
+            trimmed = self._trim_stale_backlog()
+            if trimmed:
+                _trim_events += 1
+                _trim_chunks += trimmed
+            # Bound the secondary (mic) queue too — if its transcriber dies (no
+            # Whisper fallback) routing stays on, so without this it would grow
+            # without limit. Same drop-oldest-to-stay-live policy as the primary.
+            if self._mic_routing:
+                self._trim_queue(self.mic_queue)
+            now = time.time()
+            if _trim_events and now - _last_drain_warn > 30:
+                import logging
+                _log = logging.getLogger(__name__)
+                _msg = ("Audio backlog trimmed %d times (~%.1fs dropped) in last 30s "
+                        "— transcriber feed behind real-time")
+                if _trim_events >= 50:
+                    _log.warning(_msg, _trim_events, _trim_chunks * 0.01)
+                else:
+                    _log.info(_msg, _trim_events, _trim_chunks * 0.01)
+                _last_drain_warn = now
+                _trim_events = 0
+                _trim_chunks = 0
+            # Yield the CPU only when there was nothing to forward — the gets above
+            # no longer block, so this prevents a busy-spin during silence while
+            # keeping zero added latency whenever audio is actively flowing.
+            if not produced:
+                time.sleep(0.005)
+    def _emit_all_chunks(self, buf: "deque", chunk_size: int, queue: "Queue") -> int:
+        """Pop every complete ``chunk_size`` window from ``buf`` and forward it to
+        ``queue``. Returns the number of chunks emitted.
+        Draining all complete chunks (not just one per call) is what keeps the
+        local buffer from accumulating a hidden backlog when the producer briefly
+        outpaces a single emit.
+        """
+        produced = 0
+        while len(buf) >= chunk_size:
+            data = np.array([buf.popleft() for _ in range(chunk_size)], dtype=np.float32)
+            queue.put(data)
+            produced += 1
+        return produced
-            # Watchdog: if output queue is growing too large, consumer is dead — flush it
-            qsize = self.audio_queue.qsize()
-            if qsize > _MAX_QUEUE_SIZE:
-                now = time.time()
-                if now - _last_drain_warn > 30:
-                    import logging
-                    logging.getLogger(__name__).warning(
-                        "Audio queue backlog: %d chunks (~%ds) — flushing to prevent memory leak",
-                        qsize, qsize // self.sample_rate,
-                    )
-                    _last_drain_warn = now
-                # Drain all but the last 1s
-                while self.audio_queue.qsize() > self.sample_rate:
-                    try:
-                        self.audio_queue.get_nowait()
-                    except Exception:
-                        break
+    @property
+    def has_system_audio(self) -> bool:
+        return self._has_system_audio
+    def enable_mic_routing(self) -> None:
+        """Begin routing the microphone (candidate) stream to ``mic_queue`` for a
+        secondary transcriber. Called once the mic Deepgram connection is up."""
+        self._mic_routing = True
+    def mic_source(self) -> "_QueueAudioSource":
+        """A capture-like view over ``mic_queue`` (get_audio + flush) to hand to a
+        second DeepgramTranscriber for the candidate's own voice."""
+        return _QueueAudioSource(self.mic_queue)
+    def _trim_queue(self, queue: "Queue") -> int:
+        """Drop oldest chunks from ``queue`` when it exceeds ~2s of backlog, down to
+        a ~0.5s tail. No-op at or below the threshold. Returns the count dropped.
+        A growing queue means its consumer (a transcriber feed loop) is behind
+        real-time — or, for the mic queue, has died (the secondary transcriber has
+        no Whisper fallback, so a dead Deepgram socket leaves no consumer). Either
+        way, bounding it keeps transcripts live and prevents unbounded growth.
+        """
+        if queue.qsize() <= self._STALL_TRIM_CHUNKS:
+            return 0
+        trimmed = 0
+        while queue.qsize() > self._STALL_TAIL_CHUNKS:
+            try:
+                queue.get_nowait()
+                trimmed += 1
+            except Empty:
+                break
+        return trimmed
+    def _trim_stale_backlog(self) -> int:
+        """Bound the primary transcriber queue (each item is one 10ms chunk)."""
+        return self._trim_queue(self.audio_queue)
+    def flush(self) -> int:
+        """Drop every queued audio chunk; return how many were dropped.
+        Called the moment the Deepgram WebSocket opens so audio captured during
+        the connect handshake — and during a reconnect's backoff, when the
+        producer keeps running while the socket is down — is not replayed as a
+        burst of stale, lagging transcripts ahead of live speech.
+        """
+        dropped = 0
+        while True:
+            try:
+                self.audio_queue.get_nowait()
+                dropped += 1
+            except Empty:
+                return dropped
     def stop(self) -> None:
         self._stop_event.set()

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/src/ai_interview/audio/transcriber.py RENAMED Viewed

@@ -37,11 +37,20 @@ class DeepgramTranscriber:
         transcript_buffer: "RollingTranscriptBuffer",
         api_key: str,
         language: str = "en",
+        label: str = "",
+        whisper_fallback: bool = True,
     ) -> None:
         self._capture = audio_capture
         self._buffer = transcript_buffer
         self._language = language
         self._api_key = api_key
+        # Secondary streams (e.g. the candidate's mic) set a label so their text
+        # is prefixed in the AI context (so the model can tell who spoke) and so
+        # they don't fight the primary stream on the live transcript bar.
+        self._label = label
+        # Secondary streams skip the Whisper fallback — one local Whisper model is
+        # enough; a second would double CPU/memory for little gain.
+        self._whisper_fallback = whisper_fallback
         self._stop_event = threading.Event()
         self._thread: Optional[threading.Thread] = None
         self._connection = None
@@ -50,6 +59,11 @@ class DeepgramTranscriber:
         self._last_transcript_time: float = 0.0
         self._last_interim = None
+    def _buffer_text(self, sentence: str) -> str:
+        """Prefix a labeled (secondary) stream's text so the AI context shows who
+        spoke, e.g. ``[me] ...`` for the candidate's own voice."""
+        return f"[{self._label}] {sentence}" if self._label else sentence
     def _on_message(self, sender, result=None, **kwargs):
         try:
             if result is None:
@@ -64,9 +78,10 @@ class DeepgramTranscriber:
             if result.is_final:
                 # Final: add to AI buffer and broadcast; clear pending interim
-                logger.info("Deepgram final: %s", sentence[:120])
+                logger.info("Deepgram final%s: %s",
+                            f" [{self._label}]" if self._label else "", sentence[:120])
                 self._last_interim = None
-                self._buffer.append(sentence)
+                self._buffer.append(self._buffer_text(sentence))
                 state.last_activity_at = time.time()
                 try:
                     from ai_interview.metrics import metrics
@@ -76,12 +91,14 @@ class DeepgramTranscriber:
                 if loop is not None:
                     import asyncio
                     asyncio.run_coroutine_threadsafe(
-                        self._broadcast_transcript(sentence, interim=False), loop
+                        self._broadcast_transcript(sentence, interim=False, source=self._label), loop
                     )
             else:
-                # Interim: track for utterance_end flush; broadcast to transcript bar
+                # Interim: track for utterance_end flush. Only the primary stream
+                # broadcasts interims — two streams replacing the same transcript
+                # bar in-place would flicker against each other.
                 self._last_interim = sentence
-                if loop is not None:
+                if loop is not None and not self._label:
                     import asyncio
                     asyncio.run_coroutine_threadsafe(
                         self._broadcast_transcript(sentence, interim=True), loop
@@ -90,10 +107,10 @@ class DeepgramTranscriber:
             logger.warning("Deepgram message parse error: %s", exc)
     @staticmethod
-    async def _broadcast_transcript(sentence: str, interim: bool = False) -> None:
+    async def _broadcast_transcript(sentence: str, interim: bool = False, source: str = "") -> None:
         try:
             from ai_interview.server.websocket import broadcast
-            await broadcast({"type": "transcript", "text": sentence, "interim": interim})
+            await broadcast({"type": "transcript", "text": sentence, "interim": interim, "source": source})
         except Exception:
             pass
@@ -103,13 +120,13 @@ class DeepgramTranscriber:
             last = getattr(self, '_last_interim', None)
             if last:
                 logger.info("Deepgram utterance_end flush: %s", last[:80])
-                self._buffer.append(last)
+                self._buffer.append(self._buffer_text(last))
                 from ai_interview.state import state
                 loop = state.asyncio_loop
                 if loop is not None:
                     import asyncio
                     asyncio.run_coroutine_threadsafe(
-                        self._broadcast_transcript(last, interim=False), loop
+                        self._broadcast_transcript(last, interim=False, source=self._label), loop
                     )
                 self._last_interim = None
         except Exception as exc:
@@ -197,8 +214,11 @@ class DeepgramTranscriber:
                     except Exception:
                         pass
-            logger.error("Deepgram reconnect failed after 10 attempts — falling back to local Whisper")
-            self._start_whisper_fallback()
+            if self._whisper_fallback:
+                logger.error("Deepgram reconnect failed after 10 attempts — falling back to local Whisper")
+                self._start_whisper_fallback()
+            else:
+                logger.error("Deepgram reconnect failed after 10 attempts (label=%s) — giving up, no Whisper fallback for secondary stream", self._label)
         finally:
             self._reconnect_lock.release()
@@ -368,6 +388,26 @@ class DeepgramTranscriber:
             self._connection = conn
             logger.info("Deepgram connection established")
+            # Drop audio captured during the connect handshake (and any reconnect
+            # backoff) so transcripts start aligned to live speech instead of
+            # replaying stale backlog. Covers initial connect and reconnect — both
+            # route through here. A capture stand-in without flush() is the only
+            # "expected" miss; anything else is a real fault and must be logged
+            # (a None/broken capture would otherwise crash the feed loop silently).
+            dropped = 0
+            try:
+                dropped = self._capture.flush()
+            except AttributeError:
+                logger.debug("Capture has no flush() — skipping pre-connect flush")
+            except Exception as exc:
+                logger.warning("Pre-connect flush failed: %s", exc)
+            if dropped:
+                logger.info(
+                    "Flushed %d pre-connect audio chunks (~%.1fs) to stay live",
+                    dropped, dropped * 0.01,
+                )
             try:
                 from ai_interview.metrics import metrics
                 metrics.record("deepgram_connect", val=duration_ms, ok=True)

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/src/ai_interview/daemon.py RENAMED Viewed

@@ -249,6 +249,37 @@ def run_daemon(config: Config) -> None:
     state.transcriber_name = transcriber_name
     state.ai_model = config.model
+    # Secondary transcriber: the candidate's own microphone on a SEPARATE Deepgram
+    # connection (per-role, like meeting-helper). The primary stream above carries
+    # system audio (interviewer); this adds the user's voice to the AI context,
+    # prefixed "[me]" so the model can tell the two apart. Only when system audio
+    # is the primary source (otherwise the mic already IS the primary), a Deepgram
+    # key is present, and not disabled via config.
+    mic_transcriber = None
+    if (
+        getattr(config, "transcribe_mic", True)
+        and isinstance(transcriber, DeepgramTranscriber)
+        and audio_capture.has_system_audio
+        and config.deepgram_api_key
+    ):
+        try:
+            mic_t = DeepgramTranscriber(
+                audio_capture.mic_source(),
+                state.transcript_buffer,
+                config.deepgram_api_key,
+                language=config.transcription_language,
+                label="me",
+                whisper_fallback=False,
+            )
+            if mic_t.start():
+                audio_capture.enable_mic_routing()
+                mic_transcriber = mic_t
+                logger.info("Mic (candidate) transcriber started on a separate Deepgram connection")
+            else:
+                logger.warning("Mic transcriber failed to start — continuing with interviewer audio only")
+        except Exception as exc:
+            logger.warning("Mic transcriber setup failed: %s — continuing without it", exc)
     # -----------------------------------------------------------------------
     # Start Datadog metrics + structured logging (no-op if dd_api_key absent)
     # -----------------------------------------------------------------------
@@ -363,6 +394,8 @@ def run_daemon(config: Config) -> None:
     finally:
         if transcriber is not None:
             transcriber.stop()
+        if mic_transcriber is not None:
+            mic_transcriber.stop()
         audio_capture.stop()
         # Clean up all screenshots captured during this session
         try:

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/src/ai_interview_assistant.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-interview-assistant
-Version: 2.2.2
+Version: 2.2.4
 Summary: Ghost background AI assistant for live code challenges
 Requires-Python: >=3.11
 Requires-Dist: click>=8.0

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.4}/src/ai_interview_assistant.egg-info/SOURCES.txt RENAMED Viewed

@@ -40,4 +40,5 @@ src/ai_interview_assistant.egg-info/dependency_links.txt
 src/ai_interview_assistant.egg-info/entry_points.txt
 src/ai_interview_assistant.egg-info/requires.txt
 src/ai_interview_assistant.egg-info/top_level.txt
-tests/test_llm_clients.py
+tests/test_llm_clients.py
+tests/test_transcription_phase.py

ai_interview_assistant-2.2.4/tests/test_transcription_phase.py ADDED Viewed

@@ -0,0 +1,149 @@
+"""Unit tests for live-transcription phase management (no audio devices).
+Covers docs/perf/SPEC-transcription-phase.md: flush-on-(re)connect and the
+correctly-scaled stall-recovery trim. CombinedAudioCapture.__init__ only creates
+a Queue, so it is constructed directly without opening any device.
+"""
+from __future__ import annotations
+import numpy as np
+import pytest
+from ai_interview.audio.capture import CombinedAudioCapture
+def _chunk():
+    # One 10ms chunk at 16kHz = 160 float32 samples (shape mirrors _mix_loop output).
+    return np.zeros(160, dtype=np.float32)
+def _fill(cap, n):
+    for _ in range(n):
+        cap.audio_queue.put(_chunk())
+@pytest.fixture
+def cap():
+    return CombinedAudioCapture(sample_rate=16000)
+# --------------------------------------------------------------------------
+# flush()
+# --------------------------------------------------------------------------
+def test_flush_empties_queue_and_returns_count(cap):
+    _fill(cap, 37)
+    dropped = cap.flush()
+    assert dropped == 37
+    assert cap.audio_queue.qsize() == 0
+def test_flush_on_empty_queue_returns_zero(cap):
+    assert cap.flush() == 0
+    assert cap.audio_queue.qsize() == 0
+# --------------------------------------------------------------------------
+# _trim_stale_backlog()
+# --------------------------------------------------------------------------
+def test_trim_noop_below_threshold(cap):
+    _fill(cap, cap._STALL_TRIM_CHUNKS - 1)
+    assert cap._trim_stale_backlog() == 0
+    assert cap.audio_queue.qsize() == cap._STALL_TRIM_CHUNKS - 1
+def test_trim_noop_at_exact_threshold(cap):
+    _fill(cap, cap._STALL_TRIM_CHUNKS)
+    assert cap._trim_stale_backlog() == 0, "trim must not fire at exactly the threshold"
+    assert cap.audio_queue.qsize() == cap._STALL_TRIM_CHUNKS
+def test_trim_above_threshold_drops_to_tail(cap):
+    over = cap._STALL_TRIM_CHUNKS + 120
+    _fill(cap, over)
+    trimmed = cap._trim_stale_backlog()
+    assert cap.audio_queue.qsize() == cap._STALL_TAIL_CHUNKS
+    assert trimmed == over - cap._STALL_TAIL_CHUNKS
+def test_trim_keeps_newest_tail(cap):
+    # Mark chunks with an identifiable value so we can prove the OLDEST were dropped.
+    for i in range(cap._STALL_TRIM_CHUNKS + 10):
+        cap.audio_queue.put(np.full(1, i, dtype=np.float32))
+    cap._trim_stale_backlog()
+    remaining_first = cap.audio_queue.get_nowait()[0]
+    # The first surviving chunk must be one of the newer ones, not index 0.
+    assert remaining_first > 0
+# --------------------------------------------------------------------------
+# constants sanity
+# --------------------------------------------------------------------------
+def test_stall_constants_are_sane():
+    c = CombinedAudioCapture(sample_rate=16000)
+    assert c._STALL_TRIM_CHUNKS > c._STALL_TAIL_CHUNKS > 0
+    # 10ms per chunk: ~2s trigger, ~0.5s tail.
+    assert c._STALL_TRIM_CHUNKS * 0.01 == pytest.approx(2.0)
+    assert c._STALL_TAIL_CHUNKS * 0.01 == pytest.approx(0.5)
+# --------------------------------------------------------------------------
+# _emit_all_chunks — the latency fix: emit ALL complete chunks, not one
+# --------------------------------------------------------------------------
+def test_emit_all_chunks_drains_every_complete_chunk(cap):
+    from collections import deque
+    buf = deque([0.0] * (160 * 3 + 30))  # 3 full 10ms chunks + 30 leftover samples
+    produced = cap._emit_all_chunks(buf, 160, cap.audio_queue)
+    assert produced == 3, "must emit ALL complete chunks in one call, not just one"
+    assert cap.audio_queue.qsize() == 3
+    assert len(buf) == 30, "sub-chunk remainder stays buffered for next time"
+def test_emit_all_chunks_noop_below_chunk_size(cap):
+    from collections import deque
+    buf = deque([0.0] * 159)
+    assert cap._emit_all_chunks(buf, 160, cap.audio_queue) == 0
+    assert cap.audio_queue.qsize() == 0
+    assert len(buf) == 159
+def test_emit_all_chunks_targets_the_given_queue(cap):
+    from collections import deque
+    buf = deque([0.0] * 320)
+    cap._emit_all_chunks(buf, 160, cap.mic_queue)
+    assert cap.mic_queue.qsize() == 2
+    assert cap.audio_queue.qsize() == 0, "must write to the queue it was handed"
+# --------------------------------------------------------------------------
+# Separate mic stream: routing flag + queue adapter
+# --------------------------------------------------------------------------
+def test_enable_mic_routing_flag(cap):
+    assert cap._mic_routing is False
+    cap.enable_mic_routing()
+    assert cap._mic_routing is True
+def test_mic_source_get_and_flush(cap):
+    src = cap.mic_source()
+    assert src.get_audio(timeout=0.001) is None  # empty queue
+    cap.mic_queue.put(_chunk())
+    cap.mic_queue.put(_chunk())
+    assert src.get_audio(timeout=0.001) is not None
+    assert src.flush() == 1  # the one remaining chunk dropped
+    assert cap.mic_queue.qsize() == 0
+def test_trim_queue_bounds_mic_queue(cap):
+    # If the mic transcriber dies, routing stays on and mic_queue would grow
+    # unbounded — _trim_queue must cap it to the live tail like the primary.
+    over = cap._STALL_TRIM_CHUNKS + 100
+    for _ in range(over):
+        cap.mic_queue.put(_chunk())
+    trimmed = cap._trim_queue(cap.mic_queue)
+    assert cap.mic_queue.qsize() == cap._STALL_TAIL_CHUNKS
+    assert trimmed == over - cap._STALL_TAIL_CHUNKS