ai-interview-assistant 2.2.2__tar.gz → 2.2.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/PKG-INFO +1 -1
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/pyproject.toml +1 -1
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/__init__.py +1 -1
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/capture.py +67 -18
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/transcriber.py +20 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/PKG-INFO +1 -1
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/SOURCES.txt +2 -1
- ai_interview_assistant-2.2.3/tests/test_transcription_phase.py +89 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/README.md +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/setup.cfg +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/__main__.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/ai_client.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/__init__.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/buffer.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/cli.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/config.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/daemon.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/__init__.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/__main__.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/app.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/screens/__init__.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/screens/dashboard.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/screens/hotkeys.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/screens/scripts.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/screens/settings.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/hotkey_config.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/hotkeys.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/i18n.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/llm_clients.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/menubar.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/metrics.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/ollama_utils.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/overlay.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/screenshot.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/__init__.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/app.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/routes.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/websocket.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/state.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/utils.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/watchdog.py +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/dependency_links.txt +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/entry_points.txt +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/requires.txt +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview_assistant.egg-info/top_level.txt +0 -0
- {ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/tests/test_llm_clients.py +0 -0
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/capture.py
RENAMED
|
@@ -288,6 +288,12 @@ class ScreenCaptureAudio:
|
|
|
288
288
|
class CombinedAudioCapture:
|
|
289
289
|
"""Mixes microphone + system audio and feeds a Queue for the transcriber."""
|
|
290
290
|
|
|
291
|
+
# Each queued item is one 10ms chunk (see _mix_loop). Keep the transcriber in
|
|
292
|
+
# real-time phase: if the consumer (Deepgram feed loop) falls behind, drop the
|
|
293
|
+
# oldest chunks so emitted transcripts stay live instead of drifting late.
|
|
294
|
+
_STALL_TRIM_CHUNKS = 200 # ~2.0s of backlog triggers a trim
|
|
295
|
+
_STALL_TAIL_CHUNKS = 50 # leave ~0.5s of live tail after trimming
|
|
296
|
+
|
|
291
297
|
def __init__(self, sample_rate: int = SAMPLE_RATE) -> None:
|
|
292
298
|
self.sample_rate = sample_rate
|
|
293
299
|
self.audio_queue: Queue[np.ndarray] = Queue()
|
|
@@ -327,8 +333,9 @@ class CombinedAudioCapture:
|
|
|
327
333
|
chunk_size = int(self.sample_rate * 0.01) # 10ms — forward audio to Deepgram faster
|
|
328
334
|
mic_buf: deque = deque(maxlen=self.sample_rate)
|
|
329
335
|
sys_buf: deque = deque(maxlen=self.sample_rate)
|
|
330
|
-
_MAX_QUEUE_SIZE = self.sample_rate * 30 # ~30s of audio samples
|
|
331
336
|
_last_drain_warn = 0.0
|
|
337
|
+
_trim_events = 0 # trims since last log — distinguishes a one-off spike
|
|
338
|
+
_trim_chunks = 0 # chunks dropped since last log from a chronic stall
|
|
332
339
|
|
|
333
340
|
while not self._stop_event.is_set():
|
|
334
341
|
# Always drain mic queue to prevent buffer buildup, but discard when system audio is active
|
|
@@ -358,23 +365,65 @@ class CombinedAudioCapture:
|
|
|
358
365
|
)
|
|
359
366
|
self.audio_queue.put(mic_data)
|
|
360
367
|
|
|
361
|
-
#
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
368
|
+
# Stall-recovery: if the consumer has fallen behind real-time, trim the
|
|
369
|
+
# oldest queued chunks so transcripts stay live (gappy-but-live beats
|
|
370
|
+
# complete-but-late). Aggregate over a 30s window so a chronic stall
|
|
371
|
+
# (many trims) is distinguishable from a one-off spike (a single trim)
|
|
372
|
+
# and escalates to WARNING instead of hiding behind a lone INFO line.
|
|
373
|
+
trimmed = self._trim_stale_backlog()
|
|
374
|
+
if trimmed:
|
|
375
|
+
_trim_events += 1
|
|
376
|
+
_trim_chunks += trimmed
|
|
377
|
+
now = time.time()
|
|
378
|
+
if _trim_events and now - _last_drain_warn > 30:
|
|
379
|
+
import logging
|
|
380
|
+
_log = logging.getLogger(__name__)
|
|
381
|
+
_msg = ("Audio backlog trimmed %d times (~%.1fs dropped) in last 30s "
|
|
382
|
+
"— transcriber feed behind real-time")
|
|
383
|
+
# Sustained trimming across the window => the consumer is chronically
|
|
384
|
+
# behind, not a transient spike. Escalate so it's not missed.
|
|
385
|
+
if _trim_events >= 50:
|
|
386
|
+
_log.warning(_msg, _trim_events, _trim_chunks * 0.01)
|
|
387
|
+
else:
|
|
388
|
+
_log.info(_msg, _trim_events, _trim_chunks * 0.01)
|
|
389
|
+
_last_drain_warn = now
|
|
390
|
+
_trim_events = 0
|
|
391
|
+
_trim_chunks = 0
|
|
392
|
+
|
|
393
|
+
def _trim_stale_backlog(self) -> int:
|
|
394
|
+
"""Drop oldest queued chunks when the backlog exceeds ~2s, down to a ~0.5s
|
|
395
|
+
tail. No-op at or below the trim threshold. Returns the count dropped.
|
|
396
|
+
|
|
397
|
+
Each item is one 10ms chunk; a growing queue means the transcriber feed
|
|
398
|
+
loop is behind real-time (e.g. a CPU spike), which would otherwise show as
|
|
399
|
+
transcripts arriving progressively later than live speech.
|
|
400
|
+
"""
|
|
401
|
+
if self.audio_queue.qsize() <= self._STALL_TRIM_CHUNKS:
|
|
402
|
+
return 0
|
|
403
|
+
trimmed = 0
|
|
404
|
+
while self.audio_queue.qsize() > self._STALL_TAIL_CHUNKS:
|
|
405
|
+
try:
|
|
406
|
+
self.audio_queue.get_nowait()
|
|
407
|
+
trimmed += 1
|
|
408
|
+
except Empty:
|
|
409
|
+
break
|
|
410
|
+
return trimmed
|
|
411
|
+
|
|
412
|
+
def flush(self) -> int:
|
|
413
|
+
"""Drop every queued audio chunk; return how many were dropped.
|
|
414
|
+
|
|
415
|
+
Called the moment the Deepgram WebSocket opens so audio captured during
|
|
416
|
+
the connect handshake — and during a reconnect's backoff, when the
|
|
417
|
+
producer keeps running while the socket is down — is not replayed as a
|
|
418
|
+
burst of stale, lagging transcripts ahead of live speech.
|
|
419
|
+
"""
|
|
420
|
+
dropped = 0
|
|
421
|
+
while True:
|
|
422
|
+
try:
|
|
423
|
+
self.audio_queue.get_nowait()
|
|
424
|
+
dropped += 1
|
|
425
|
+
except Empty:
|
|
426
|
+
return dropped
|
|
378
427
|
|
|
379
428
|
def stop(self) -> None:
|
|
380
429
|
self._stop_event.set()
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/transcriber.py
RENAMED
|
@@ -368,6 +368,26 @@ class DeepgramTranscriber:
|
|
|
368
368
|
|
|
369
369
|
self._connection = conn
|
|
370
370
|
logger.info("Deepgram connection established")
|
|
371
|
+
|
|
372
|
+
# Drop audio captured during the connect handshake (and any reconnect
|
|
373
|
+
# backoff) so transcripts start aligned to live speech instead of
|
|
374
|
+
# replaying stale backlog. Covers initial connect and reconnect — both
|
|
375
|
+
# route through here. A capture stand-in without flush() is the only
|
|
376
|
+
# "expected" miss; anything else is a real fault and must be logged
|
|
377
|
+
# (a None/broken capture would otherwise crash the feed loop silently).
|
|
378
|
+
dropped = 0
|
|
379
|
+
try:
|
|
380
|
+
dropped = self._capture.flush()
|
|
381
|
+
except AttributeError:
|
|
382
|
+
logger.debug("Capture has no flush() — skipping pre-connect flush")
|
|
383
|
+
except Exception as exc:
|
|
384
|
+
logger.warning("Pre-connect flush failed: %s", exc)
|
|
385
|
+
if dropped:
|
|
386
|
+
logger.info(
|
|
387
|
+
"Flushed %d pre-connect audio chunks (~%.1fs) to stay live",
|
|
388
|
+
dropped, dropped * 0.01,
|
|
389
|
+
)
|
|
390
|
+
|
|
371
391
|
try:
|
|
372
392
|
from ai_interview.metrics import metrics
|
|
373
393
|
metrics.record("deepgram_connect", val=duration_ms, ok=True)
|
|
@@ -40,4 +40,5 @@ src/ai_interview_assistant.egg-info/dependency_links.txt
|
|
|
40
40
|
src/ai_interview_assistant.egg-info/entry_points.txt
|
|
41
41
|
src/ai_interview_assistant.egg-info/requires.txt
|
|
42
42
|
src/ai_interview_assistant.egg-info/top_level.txt
|
|
43
|
-
tests/test_llm_clients.py
|
|
43
|
+
tests/test_llm_clients.py
|
|
44
|
+
tests/test_transcription_phase.py
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Unit tests for live-transcription phase management (no audio devices).
|
|
2
|
+
|
|
3
|
+
Covers docs/perf/SPEC-transcription-phase.md: flush-on-(re)connect and the
|
|
4
|
+
correctly-scaled stall-recovery trim. CombinedAudioCapture.__init__ only creates
|
|
5
|
+
a Queue, so it is constructed directly without opening any device.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import numpy as np
|
|
10
|
+
import pytest
|
|
11
|
+
|
|
12
|
+
from ai_interview.audio.capture import CombinedAudioCapture
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def _chunk():
|
|
16
|
+
# One 10ms chunk at 16kHz = 160 float32 samples (shape mirrors _mix_loop output).
|
|
17
|
+
return np.zeros(160, dtype=np.float32)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def _fill(cap, n):
|
|
21
|
+
for _ in range(n):
|
|
22
|
+
cap.audio_queue.put(_chunk())
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture
|
|
26
|
+
def cap():
|
|
27
|
+
return CombinedAudioCapture(sample_rate=16000)
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
# --------------------------------------------------------------------------
|
|
31
|
+
# flush()
|
|
32
|
+
# --------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
def test_flush_empties_queue_and_returns_count(cap):
|
|
35
|
+
_fill(cap, 37)
|
|
36
|
+
dropped = cap.flush()
|
|
37
|
+
assert dropped == 37
|
|
38
|
+
assert cap.audio_queue.qsize() == 0
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def test_flush_on_empty_queue_returns_zero(cap):
|
|
42
|
+
assert cap.flush() == 0
|
|
43
|
+
assert cap.audio_queue.qsize() == 0
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
# --------------------------------------------------------------------------
|
|
47
|
+
# _trim_stale_backlog()
|
|
48
|
+
# --------------------------------------------------------------------------
|
|
49
|
+
|
|
50
|
+
def test_trim_noop_below_threshold(cap):
|
|
51
|
+
_fill(cap, cap._STALL_TRIM_CHUNKS - 1)
|
|
52
|
+
assert cap._trim_stale_backlog() == 0
|
|
53
|
+
assert cap.audio_queue.qsize() == cap._STALL_TRIM_CHUNKS - 1
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def test_trim_noop_at_exact_threshold(cap):
|
|
57
|
+
_fill(cap, cap._STALL_TRIM_CHUNKS)
|
|
58
|
+
assert cap._trim_stale_backlog() == 0, "trim must not fire at exactly the threshold"
|
|
59
|
+
assert cap.audio_queue.qsize() == cap._STALL_TRIM_CHUNKS
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_trim_above_threshold_drops_to_tail(cap):
|
|
63
|
+
over = cap._STALL_TRIM_CHUNKS + 120
|
|
64
|
+
_fill(cap, over)
|
|
65
|
+
trimmed = cap._trim_stale_backlog()
|
|
66
|
+
assert cap.audio_queue.qsize() == cap._STALL_TAIL_CHUNKS
|
|
67
|
+
assert trimmed == over - cap._STALL_TAIL_CHUNKS
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def test_trim_keeps_newest_tail(cap):
|
|
71
|
+
# Mark chunks with an identifiable value so we can prove the OLDEST were dropped.
|
|
72
|
+
for i in range(cap._STALL_TRIM_CHUNKS + 10):
|
|
73
|
+
cap.audio_queue.put(np.full(1, i, dtype=np.float32))
|
|
74
|
+
cap._trim_stale_backlog()
|
|
75
|
+
remaining_first = cap.audio_queue.get_nowait()[0]
|
|
76
|
+
# The first surviving chunk must be one of the newer ones, not index 0.
|
|
77
|
+
assert remaining_first > 0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
# --------------------------------------------------------------------------
|
|
81
|
+
# constants sanity
|
|
82
|
+
# --------------------------------------------------------------------------
|
|
83
|
+
|
|
84
|
+
def test_stall_constants_are_sane():
|
|
85
|
+
c = CombinedAudioCapture(sample_rate=16000)
|
|
86
|
+
assert c._STALL_TRIM_CHUNKS > c._STALL_TAIL_CHUNKS > 0
|
|
87
|
+
# 10ms per chunk: ~2s trigger, ~0.5s tail.
|
|
88
|
+
assert c._STALL_TRIM_CHUNKS * 0.01 == pytest.approx(2.0)
|
|
89
|
+
assert c._STALL_TAIL_CHUNKS * 0.01 == pytest.approx(0.5)
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/audio/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/__init__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/__main__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/flet_gui/app.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/hotkey_config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/llm_clients.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/ollama_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/screenshot.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/__init__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/app.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/routes.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.2 → ai_interview_assistant-2.2.3}/src/ai_interview/server/websocket.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|