PyPI - ai-interview-assistant - Versions diffs - 2.2.2__tar.gz → 2.2.3__tar.gz - Mend

ai-interview-assistant 2.2.2tar.gz → 2.2.3tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (46) hide show

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-interview-assistant
-Version: 2.2.2
+Version: 2.2.3
 Summary: Ghost background AI assistant for live code challenges
 Requires-Python: >=3.11
 Requires-Dist: click>=8.0

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ai-interview-assistant"
-version = "2.2.2"
+version = "2.2.3"
 description = "Ghost background AI assistant for live code challenges"
 requires-python = ">=3.11"
 dependencies = [

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """AI Interview Assistant — ghost background tool for live code challenges."""
-__version__ = "2.2.2"
+__version__ = "2.2.3"

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/capture.py RENAMED Viewed

@@ -288,6 +288,12 @@ class ScreenCaptureAudio:
 class CombinedAudioCapture:
     """Mixes microphone + system audio and feeds a Queue for the transcriber."""
+    # Each queued item is one 10ms chunk (see _mix_loop). Keep the transcriber in
+    # real-time phase: if the consumer (Deepgram feed loop) falls behind, drop the
+    # oldest chunks so emitted transcripts stay live instead of drifting late.
+    _STALL_TRIM_CHUNKS = 200   # ~2.0s of backlog triggers a trim
+    _STALL_TAIL_CHUNKS = 50    # leave ~0.5s of live tail after trimming
     def __init__(self, sample_rate: int = SAMPLE_RATE) -> None:
         self.sample_rate = sample_rate
         self.audio_queue: Queue[np.ndarray] = Queue()
@@ -327,8 +333,9 @@ class CombinedAudioCapture:
         chunk_size = int(self.sample_rate * 0.01)  # 10ms — forward audio to Deepgram faster
         mic_buf: deque = deque(maxlen=self.sample_rate)
         sys_buf: deque = deque(maxlen=self.sample_rate)
-        _MAX_QUEUE_SIZE = self.sample_rate * 30  # ~30s of audio samples
         _last_drain_warn = 0.0
+        _trim_events = 0   # trims since last log — distinguishes a one-off spike
+        _trim_chunks = 0   # chunks dropped since last log  from a chronic stall
         while not self._stop_event.is_set():
             # Always drain mic queue to prevent buffer buildup, but discard when system audio is active
@@ -358,23 +365,65 @@ class CombinedAudioCapture:
                     )
                     self.audio_queue.put(mic_data)
-            # Watchdog: if output queue is growing too large, consumer is dead — flush it
-            qsize = self.audio_queue.qsize()
-            if qsize > _MAX_QUEUE_SIZE:
-                now = time.time()
-                if now - _last_drain_warn > 30:
-                    import logging
-                    logging.getLogger(__name__).warning(
-                        "Audio queue backlog: %d chunks (~%ds) — flushing to prevent memory leak",
-                        qsize, qsize // self.sample_rate,
-                    )
-                    _last_drain_warn = now
-                # Drain all but the last 1s
-                while self.audio_queue.qsize() > self.sample_rate:
-                    try:
-                        self.audio_queue.get_nowait()
-                    except Exception:
-                        break
+            # Stall-recovery: if the consumer has fallen behind real-time, trim the
+            # oldest queued chunks so transcripts stay live (gappy-but-live beats
+            # complete-but-late). Aggregate over a 30s window so a chronic stall
+            # (many trims) is distinguishable from a one-off spike (a single trim)
+            # and escalates to WARNING instead of hiding behind a lone INFO line.
+            trimmed = self._trim_stale_backlog()
+            if trimmed:
+                _trim_events += 1
+                _trim_chunks += trimmed
+            now = time.time()
+            if _trim_events and now - _last_drain_warn > 30:
+                import logging
+                _log = logging.getLogger(__name__)
+                _msg = ("Audio backlog trimmed %d times (~%.1fs dropped) in last 30s "
+                        "— transcriber feed behind real-time")
+                # Sustained trimming across the window => the consumer is chronically
+                # behind, not a transient spike. Escalate so it's not missed.
+                if _trim_events >= 50:
+                    _log.warning(_msg, _trim_events, _trim_chunks * 0.01)
+                else:
+                    _log.info(_msg, _trim_events, _trim_chunks * 0.01)
+                _last_drain_warn = now
+                _trim_events = 0
+                _trim_chunks = 0
+    def _trim_stale_backlog(self) -> int:
+        """Drop oldest queued chunks when the backlog exceeds ~2s, down to a ~0.5s
+        tail. No-op at or below the trim threshold. Returns the count dropped.
+        Each item is one 10ms chunk; a growing queue means the transcriber feed
+        loop is behind real-time (e.g. a CPU spike), which would otherwise show as
+        transcripts arriving progressively later than live speech.
+        """
+        if self.audio_queue.qsize() <= self._STALL_TRIM_CHUNKS:
+            return 0
+        trimmed = 0
+        while self.audio_queue.qsize() > self._STALL_TAIL_CHUNKS:
+            try:
+                self.audio_queue.get_nowait()
+                trimmed += 1
+            except Empty:
+                break
+        return trimmed
+    def flush(self) -> int:
+        """Drop every queued audio chunk; return how many were dropped.
+        Called the moment the Deepgram WebSocket opens so audio captured during
+        the connect handshake — and during a reconnect's backoff, when the
+        producer keeps running while the socket is down — is not replayed as a
+        burst of stale, lagging transcripts ahead of live speech.
+        """
+        dropped = 0
+        while True:
+            try:
+                self.audio_queue.get_nowait()
+                dropped += 1
+            except Empty:
+                return dropped
     def stop(self) -> None:
         self._stop_event.set()

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/transcriber.py RENAMED Viewed

@@ -368,6 +368,26 @@ class DeepgramTranscriber:
             self._connection = conn
             logger.info("Deepgram connection established")
+            # Drop audio captured during the connect handshake (and any reconnect
+            # backoff) so transcripts start aligned to live speech instead of
+            # replaying stale backlog. Covers initial connect and reconnect — both
+            # route through here. A capture stand-in without flush() is the only
+            # "expected" miss; anything else is a real fault and must be logged
+            # (a None/broken capture would otherwise crash the feed loop silently).
+            dropped = 0
+            try:
+                dropped = self._capture.flush()
+            except AttributeError:
+                logger.debug("Capture has no flush() — skipping pre-connect flush")
+            except Exception as exc:
+                logger.warning("Pre-connect flush failed: %s", exc)
+            if dropped:
+                logger.info(
+                    "Flushed %d pre-connect audio chunks (~%.1fs) to stay live",
+                    dropped, dropped * 0.01,
+                )
             try:
                 from ai_interview.metrics import metrics
                 metrics.record("deepgram_connect", val=duration_ms, ok=True)

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-interview-assistant
-Version: 2.2.2
+Version: 2.2.3
 Summary: Ghost background AI assistant for live code challenges
 Requires-Python: >=3.11
 Requires-Dist: click>=8.0

{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/SOURCES.txt RENAMED Viewed

@@ -40,4 +40,5 @@ src/ai_interview_assistant.egg-info/dependency_links.txt
 src/ai_interview_assistant.egg-info/entry_points.txt
 src/ai_interview_assistant.egg-info/requires.txt
 src/ai_interview_assistant.egg-info/top_level.txt
-tests/test_llm_clients.py
+tests/test_llm_clients.py
+tests/test_transcription_phase.py

ai_interview_assistant-2.2.3/tests/test_transcription_phase.py ADDED Viewed

@@ -0,0 +1,89 @@
+"""Unit tests for live-transcription phase management (no audio devices).
+Covers docs/perf/SPEC-transcription-phase.md: flush-on-(re)connect and the
+correctly-scaled stall-recovery trim. CombinedAudioCapture.__init__ only creates
+a Queue, so it is constructed directly without opening any device.
+"""
+from __future__ import annotations
+import numpy as np
+import pytest
+from ai_interview.audio.capture import CombinedAudioCapture
+def _chunk():
+    # One 10ms chunk at 16kHz = 160 float32 samples (shape mirrors _mix_loop output).
+    return np.zeros(160, dtype=np.float32)
+def _fill(cap, n):
+    for _ in range(n):
+        cap.audio_queue.put(_chunk())
+@pytest.fixture
+def cap():
+    return CombinedAudioCapture(sample_rate=16000)
+# --------------------------------------------------------------------------
+# flush()
+# --------------------------------------------------------------------------
+def test_flush_empties_queue_and_returns_count(cap):
+    _fill(cap, 37)
+    dropped = cap.flush()
+    assert dropped == 37
+    assert cap.audio_queue.qsize() == 0
+def test_flush_on_empty_queue_returns_zero(cap):
+    assert cap.flush() == 0
+    assert cap.audio_queue.qsize() == 0
+# --------------------------------------------------------------------------
+# _trim_stale_backlog()
+# --------------------------------------------------------------------------
+def test_trim_noop_below_threshold(cap):
+    _fill(cap, cap._STALL_TRIM_CHUNKS - 1)
+    assert cap._trim_stale_backlog() == 0
+    assert cap.audio_queue.qsize() == cap._STALL_TRIM_CHUNKS - 1
+def test_trim_noop_at_exact_threshold(cap):
+    _fill(cap, cap._STALL_TRIM_CHUNKS)
+    assert cap._trim_stale_backlog() == 0, "trim must not fire at exactly the threshold"
+    assert cap.audio_queue.qsize() == cap._STALL_TRIM_CHUNKS
+def test_trim_above_threshold_drops_to_tail(cap):
+    over = cap._STALL_TRIM_CHUNKS + 120
+    _fill(cap, over)
+    trimmed = cap._trim_stale_backlog()
+    assert cap.audio_queue.qsize() == cap._STALL_TAIL_CHUNKS
+    assert trimmed == over - cap._STALL_TAIL_CHUNKS
+def test_trim_keeps_newest_tail(cap):
+    # Mark chunks with an identifiable value so we can prove the OLDEST were dropped.
+    for i in range(cap._STALL_TRIM_CHUNKS + 10):
+        cap.audio_queue.put(np.full(1, i, dtype=np.float32))
+    cap._trim_stale_backlog()
+    remaining_first = cap.audio_queue.get_nowait()[0]
+    # The first surviving chunk must be one of the newer ones, not index 0.
+    assert remaining_first > 0
+# --------------------------------------------------------------------------
+# constants sanity
+# --------------------------------------------------------------------------
+def test_stall_constants_are_sane():
+    c = CombinedAudioCapture(sample_rate=16000)
+    assert c._STALL_TRIM_CHUNKS > c._STALL_TAIL_CHUNKS > 0
+    # 10ms per chunk: ~2s trigger, ~0.5s tail.
+    assert c._STALL_TRIM_CHUNKS * 0.01 == pytest.approx(2.0)
+    assert c._STALL_TAIL_CHUNKS * 0.01 == pytest.approx(0.5)