PyPI - ai-interview-assistant - Versions diffs - 2.2.0__tar.gz → 2.2.2__tar.gz - Mend

ai-interview-assistant 2.2.0tar.gz → 2.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-interview-assistant
-Version: 2.2.0
+Version: 2.2.2
 Summary: Ghost background AI assistant for live code challenges
 Requires-Python: >=3.11
 Requires-Dist: click>=8.0
@@ -27,5 +27,9 @@ Requires-Dist: simple-term-menu>=1.6.0
 Requires-Dist: datadog>=0.49.0
 Requires-Dist: psutil>=5.9
 Requires-Dist: httpx>=0.27.0
+Requires-Dist: h2>=4.0
 Requires-Dist: flet>=0.25.0
 Requires-Dist: PyPDF2>=3.0
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23; extra == "dev"

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "ai-interview-assistant"
-version = "2.2.0"
+version = "2.2.2"
 description = "Ghost background AI assistant for live code challenges"
 requires-python = ">=3.11"
 dependencies = [
@@ -32,10 +32,17 @@ dependencies = [
     "datadog>=0.49.0",
     "psutil>=5.9",
     "httpx>=0.27.0",
+    "h2>=4.0",
     "flet>=0.25.0",
     "PyPDF2>=3.0",
 ]
+[project.optional-dependencies]
+dev = [
+    "pytest>=8.0",
+    "pytest-asyncio>=0.23",
+]
 [project.scripts]
 ai-interview = "ai_interview.cli:cli"

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """AI Interview Assistant — ghost background tool for live code challenges."""
-__version__ = "2.2.0"
+__version__ = "2.2.2"

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/ai_client.py RENAMED Viewed

@@ -139,16 +139,13 @@ async def _stream_claude(state: "AppState", config: "Config", messages: list, cu
     """Stream response from Claude API with retry on transient errors."""
     from ai_interview.server.websocket import broadcast
     from ai_interview.metrics import metrics
+    from ai_interview import llm_clients
     import anthropic
-    is_oauth = config.anthropic_api_key.startswith("sk-ant-oat")
-    if is_oauth:
-        client = anthropic.AsyncAnthropic(
-            api_key="placeholder",
-            default_headers={"Authorization": f"Bearer {config.anthropic_api_key}"},
-        )
-    else:
-        client = anthropic.AsyncAnthropic(api_key=config.anthropic_api_key)
+    # Shared, connection-pooled HTTP/2 client, reused across queries so query 2+
+    # skip the DNS+TCP+TLS handshake. OAuth (sk-ant-oat) handling lives in the
+    # builder. Do NOT close it here — it is shared and closed at daemon shutdown.
+    client = llm_clients.get_anthropic_client(config.anthropic_api_key)
     full_text = ""
     _t0 = time.monotonic()
@@ -262,10 +259,13 @@ async def _stream_gemini(state: "AppState", config: "Config", messages: list, cu
     _system_prompt = system_prompt or config.system_prompt
     try:
-        from google import genai
         from google.genai import types
+        from ai_interview import llm_clients
-        client = genai.Client(api_key=config.google_api_key)
+        # Shared client — reused across queries (connection reuse; google-genai
+        # accepts no custom httpx client, so no HTTP/2). Serialized query model +
+        # idle-gated keepalive mean it is never used concurrently.
+        client = llm_clients.get_gemini_client(config.google_api_key)
         contents = _messages_to_gemini_contents(messages, _system_prompt)
         # Run synchronous streaming in a thread to avoid blocking asyncio

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/config.py RENAMED Viewed

@@ -188,6 +188,25 @@ def _read_cv_text(cv_path: str) -> str:
         return ""
+def _assemble_with_context(base_prompt: str, codebase: str, cv_text: str) -> str:
+    """Append codebase and CV context blocks to a user-supplied base prompt."""
+    prompt = base_prompt
+    if codebase:
+        prompt += (
+            "\n\n---\nThe interview is about the following codebase. "
+            "Use it to answer questions about the code directly.\n\n"
+            + codebase
+        )
+    if cv_text:
+        prompt += (
+            "\n\nCandidate's CV/Resume:\n"
+            f"{cv_text}\n\n"
+            "When answering behavioural or experience questions, reference real projects "
+            "and roles from this CV. Keep answers first-person as if the candidate is speaking.\n"
+        )
+    return prompt
 def _build_system_prompt(
     language: str = "",
     challenge_type: str = "",
@@ -306,6 +325,7 @@ class Config:
     whisper_host: str = ""                  # Remote faster-whisper-server URL (e.g. http://192.168.1.50:8000)
     show_menubar_icon: bool = True
     scripts: dict = field(default_factory=dict)
+    user_system_prompt: str = ""   # user-edited base prompt from dashboard preview
     system_prompt: str = field(default="")
     @property
@@ -340,11 +360,17 @@ class Config:
         if not self.system_prompt:
             codebase = read_codebase_context(self.context_path) if self.context_path else ""
             cv_text = _read_cv_text(self.cv_path)
-            self.system_prompt = _build_system_prompt(
-                self.interview_language, self.challenge_type, codebase,
-                self.role_context, self.transcription_language, cv_text,
-                scripts=self.scripts,
-            )
+            if self.user_system_prompt:
+                # User customised the base prompt — use it and still inject codebase/CV
+                self.system_prompt = _assemble_with_context(
+                    self.user_system_prompt, codebase, cv_text
+                )
+            else:
+                self.system_prompt = _build_system_prompt(
+                    self.interview_language, self.challenge_type, codebase,
+                    self.role_context, self.transcription_language, cv_text,
+                    scripts=self.scripts,
+                )
     @classmethod
     def from_saved(cls, overrides: Optional[dict] = None) -> "Config":
@@ -392,4 +418,5 @@ class Config:
             whisper_host=merged.get("whisper_host", ""),
             show_menubar_icon=merged.get("show_menubar_icon", True),
             scripts=merged.get("scripts", {}),
+            user_system_prompt=merged.get("user_system_prompt", ""),
         )

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/daemon.py RENAMED Viewed

@@ -279,10 +279,19 @@ def run_daemon(config: Config) -> None:
         await site.start()
         logger.info("Viewer server on 0.0.0.0:%d", config.port)
+        # Pre-warm pooled LLM connections (HTTP/2 + keep-alive) so the very first
+        # query skips the DNS+TCP+TLS handshake. Best-effort — never blocks start.
+        from ai_interview import llm_clients
+        try:
+            await llm_clients.prewarm(config)
+        except Exception as exc:
+            logger.info("LLM prewarm failed (best-effort): %s", exc)
         _MAX_IDLE_S = 30 * 60  # 30 min idle → auto-shutdown
         state.last_activity_at = time.time()  # initialise so first check is fair
         _last_heartbeat_uptime = -1
         _last_cpu_broadcast = 0.0
+        _last_llm_keepalive = time.time()  # prewarm just pinged; next ping in ~30s
         # Prime psutil baseline — first call always returns 0.0, real values start from second call
         try:
@@ -323,11 +332,28 @@ def run_daemon(config: Config) -> None:
                 except Exception:
                     pass
+            # Keep pooled LLM sockets warm (~30s cadence) so a query after a long
+            # idle gap doesn't pay a fresh handshake. Only ping while idle: an
+            # in-flight query already keeps the connection hot, and skipping then
+            # avoids touching the shared client concurrently. Fire-and-forget so
+            # the 0.5s health loop never blocks on the network.
+            if now - _last_llm_keepalive >= 30:
+                _last_llm_keepalive = now
+                _aq = state.active_query
+                if _aq is None or _aq.done():
+                    asyncio.ensure_future(llm_clients.keepalive_ping())
             # Auto-shutdown after 30 min of no hotkeys / queries / transcripts
             if idle >= _MAX_IDLE_S:
                 logger.info("30-minute idle limit reached — shutting down automatically")
                 shutdown_flag["stop"] = True
+        # Close pooled LLM clients in the loop they were created in (best-effort).
+        try:
+            await llm_clients.close_all()
+        except Exception as exc:
+            logger.info("LLM client close failed (best-effort): %s", exc)
         await runner.cleanup()
     try:

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/dashboard.py RENAMED Viewed

@@ -151,15 +151,32 @@ class DashboardScreen(ft.Column):
         # ---- Prompt preview ----
         self._prompt_preview_expanded = False
-        self._prompt_preview = ft.Text(
-            "",
-            size=11,
-            italic=True,
-            color=MUTED,
-            selectable=False,
+        self._preview_manually_edited = False
+        self._prompt_preview = ft.TextField(
+            value="",
+            multiline=True,
+            min_lines=8,
+            max_lines=20,
+            text_size=11,
+            border_radius=8,
+            border_color="#2a2a2a",
+            bgcolor="#111111",
+            content_padding=ft.padding.all(12),
+            text_style=ft.TextStyle(italic=True, color=MUTED),
             visible=False,
+            expand=True,
+            on_change=lambda _: setattr(self, "_preview_manually_edited", True),
         )
+        self._preview_notes = ft.Text("", size=10, color=MUTED, italic=True, visible=False)
         self._preview_chevron = ft.Text("▶", size=11, color=MUTED)
+        self._regenerate_btn = ft.IconButton(
+            icon=ft.Icons.REFRESH,
+            icon_size=14,
+            icon_color=MUTED,
+            tooltip=t("regenerate_preview"),
+            on_click=self._regenerate_preview,
+            visible=False,
+        )
         self._update_prompt_preview()
         self._lang_field.visible = challenge in ("code_challenge", "system_design")
@@ -218,19 +235,23 @@ class DashboardScreen(ft.Column):
             transcription_card,
             self._cv_check,
             ft.Column([
-                ft.TextButton(
-                    content=ft.Row(
-                        [
-                            ft.Text(t("prompt_preview_label"), size=11, color=MUTED),
-                            self._preview_chevron,
-                        ],
-                        spacing=4,
-                        tight=True,
+                ft.Row([
+                    ft.TextButton(
+                        content=ft.Row(
+                            [
+                                ft.Text(t("prompt_preview_label"), size=11, color=MUTED),
+                                self._preview_chevron,
+                            ],
+                            spacing=4,
+                            tight=True,
+                        ),
+                        on_click=self._toggle_prompt_preview,
+                        style=ft.ButtonStyle(padding=ft.padding.all(0), overlay_color=ft.Colors.TRANSPARENT),
                     ),
-                    on_click=self._toggle_prompt_preview,
-                    style=ft.ButtonStyle(padding=ft.padding.all(0), overlay_color=ft.Colors.TRANSPARENT),
-                ),
+                    self._regenerate_btn,
+                ], spacing=0, vertical_alignment=ft.CrossAxisAlignment.CENTER),
                 self._prompt_preview,
+                self._preview_notes,
             ], spacing=6),
             ft.Row([self._start_btn, self._start_status], spacing=12),
         ], spacing=12)
@@ -333,6 +354,15 @@ class DashboardScreen(ft.Column):
         self._cv_check.value = bool(cv_name)
         self._cv_check.disabled = not bool(cv_name)
+        # Restore saved prompt if present, otherwise regenerate
+        saved_prompt = fresh.get("user_system_prompt", "")
+        if saved_prompt:
+            self._prompt_preview.value = saved_prompt
+            self._preview_manually_edited = True
+        else:
+            self._preview_manually_edited = False
+            self._update_prompt_preview()
         self._update_model_options()
         try:
             self._page.update()
@@ -408,11 +438,24 @@ class DashboardScreen(ft.Column):
     def _toggle_prompt_preview(self, _e=None):
         self._prompt_preview_expanded = not self._prompt_preview_expanded
-        self._prompt_preview.visible = self._prompt_preview_expanded
-        self._preview_chevron.value = "▼" if self._prompt_preview_expanded else "▶"
+        expanded = self._prompt_preview_expanded
+        self._prompt_preview.visible = expanded
+        self._preview_notes.visible = expanded and bool(self._preview_notes.value)
+        self._regenerate_btn.visible = expanded
+        self._preview_chevron.value = "▼" if expanded else "▶"
         try:
             self._preview_chevron.update()
             self._prompt_preview.update()
+            self._preview_notes.update()
+            self._regenerate_btn.update()
+        except Exception:
+            pass
+    def _regenerate_preview(self, _e=None):
+        self._preview_manually_edited = False
+        self._update_prompt_preview()
+        try:
+            self._page.update()
         except Exception:
             pass
@@ -420,33 +463,41 @@ class DashboardScreen(ft.Column):
         """Rebuild the system prompt preview from current field values."""
         if not hasattr(self, "_cv_check") or not hasattr(self, "_prompt_preview"):
             return
-        challenge = self._challenge_dropdown.value or "code_challenge"
-        language = self._lang_field.value or ""
-        brief = self._brief_field.value or ""
+        # Rebuild notes regardless of manual edit state
         context_path = (self._context_path.value or "").strip()
         cv_selected = self._cv_check.value
+        notes = []
+        if context_path:
+            notes.append(f"+ codebase context will be injected from: {context_path}")
+        if cv_selected and self._config.get("cv_path"):
+            notes.append(f"+ CV will be injected: {os.path.basename(self._config['cv_path'])}")
+        self._preview_notes.value = "\n".join(notes)
+        self._preview_notes.visible = self._prompt_preview_expanded and bool(notes)
+        if self._preview_manually_edited:
+            # User has edited the prompt — don't overwrite, just refresh notes
+            if self._prompt_preview_expanded:
+                try:
+                    self._preview_notes.update()
+                except Exception:
+                    pass
+            return
         scripts = {**DEFAULT_SCRIPTS, **self._config.get("scripts", {})}
         prompt = _build_system_prompt(
-            language=language,
-            challenge_type=challenge,
+            language=self._lang_field.value or "",
+            challenge_type=self._challenge_dropdown.value or "code_challenge",
             codebase_context="",
-            role_context=brief,
+            role_context=self._brief_field.value or "",
             scripts=scripts,
         )
-        notes = []
-        if context_path:
-            notes.append(f"+ codebase context from: {context_path}")
-        if cv_selected and self._config.get("cv_path"):
-            notes.append(f"+ CV: {os.path.basename(self._config['cv_path'])}")
-        if notes:
-            prompt += "\n\n" + "\n".join(notes)
         self._prompt_preview.value = prompt
         if self._prompt_preview_expanded:
             try:
                 self._prompt_preview.update()
+                self._preview_notes.update()
             except Exception:
                 pass
@@ -540,6 +591,7 @@ class DashboardScreen(ft.Column):
                 config["challenge_type"] = self._challenge_dropdown.value or "code_challenge"
                 config["transcription_language"] = self._trans_lang.value or "auto"
                 config["context_path"] = (self._context_path.value or "").strip()
+                config["user_system_prompt"] = (self._prompt_preview.value or "").strip()
                 # Per-session CV toggle — don't permanently clear the path
                 if not self._cv_check.value:

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/i18n.py RENAMED Viewed

@@ -155,7 +155,8 @@ _STRINGS = {
         "type_press_hold": "Tap + Hold",
         # Dashboard prompt preview
-        "prompt_preview_label": "What will be sent to the AI (preview):",
+        "prompt_preview_label": "What will be sent to the AI:",
+        "regenerate_preview": "Regenerate from current selections",
         # AI Scripts
         "save": "Save",

ai_interview_assistant-2.2.2/src/ai_interview/llm_clients.py ADDED Viewed

@@ -0,0 +1,225 @@
+"""Shared, long-lived LLM SDK clients (connection reuse + HTTP/2 + heartbeat).
+Every AI query used to build a fresh SDK client (``ai_client._stream_claude`` /
+``_stream_gemini``), paying a new DNS + TCP + TLS handshake on its first request.
+On a high-latency link that handshake dominates time-to-first-token. Deepgram
+never has this problem because it holds one persistent websocket for the whole
+session.
+This module mirrors that idea: one connection-pooled client per provider, reused
+across every query in a daemon session. Query 1 pays the handshake; query 2+ skip
+it. The Anthropic transport additionally uses HTTP/2 (multiplexing + the
+connection stays open), and a periodic keep-alive heartbeat keeps the pooled
+socket warm so it never idles out mid-session.
+Design notes / invariants:
+- TLS verification stays ON (this repo verifies via certifi; do NOT add
+  ``verify=False`` here).
+- No top-level imports of ``ai_interview.*`` or of audio libraries — keep this
+  importable both before and after the daemon fork. SDKs are imported lazily.
+- The daemon runs a single asyncio loop and serializes queries (a new query
+  cancels the in-flight one), so a shared client is never used by two queries at
+  once. The caller gates ``keepalive_ping`` on "no active query".
+"""
+from __future__ import annotations
+import inspect
+import logging
+logger = logging.getLogger(__name__)
+# provider -> (api_key, client_object)
+_CLIENTS: dict[str, tuple[str, object]] = {}
+# Strong refs to in-flight rotation-close tasks, so a fire-and-forget close is
+# not GC'd before it runs (and so close_all can drain them at shutdown).
+_PENDING_CLOSES: set = set()
+# Latched so the HTTP/2 status (on, or downgraded to 1.1) is logged exactly once.
+_h2_unavailable_logged = False
+_h2_enabled_logged = False
+def _httpx_client():
+    """A connection-pooled async httpx client tuned to stay warm across the gaps
+    between queries. HTTP/2 multiplexes and keeps the connection open;
+    ``keepalive_expiry`` outlives a typical idle gap.
+    HTTP/2 needs the optional ``h2`` package. If it is not installed, httpx raises
+    ``ImportError`` when ``http2=True``; we degrade to HTTP/1.1 (still pooled +
+    keep-alive) instead of breaking queries.
+    """
+    global _h2_unavailable_logged, _h2_enabled_logged
+    import httpx
+    limits = httpx.Limits(max_keepalive_connections=10, keepalive_expiry=300.0)
+    try:
+        client = httpx.AsyncClient(http2=True, limits=limits)
+        if not _h2_enabled_logged:
+            _h2_enabled_logged = True
+            logger.info("LLM transport: HTTP/2 enabled with connection reuse")
+        return client
+    except ImportError:
+        if not _h2_unavailable_logged:
+            _h2_unavailable_logged = True
+            logger.info(
+                "HTTP/2 unavailable (install 'h2' / httpx[http2]) — "
+                "using HTTP/1.1 with connection reuse"
+            )
+        return httpx.AsyncClient(http2=False, limits=limits)
+def _build_anthropic(api_key: str):
+    """Build a pooled AsyncAnthropic, honoring the OAuth Bearer-token convention.
+    Mirrors ``ai_client._stream_claude``: an ``sk-ant-oat`` key is an OAuth token
+    sent via the Authorization header, not the api_key field.
+    """
+    import anthropic
+    if api_key.startswith("sk-ant-oat"):
+        return anthropic.AsyncAnthropic(
+            api_key="placeholder",
+            default_headers={"Authorization": f"Bearer {api_key}"},
+            http_client=_httpx_client(),
+        )
+    return anthropic.AsyncAnthropic(api_key=api_key, http_client=_httpx_client())
+def _build_gemini(api_key: str):
+    """Build a reused genai.Client.
+    google-genai does not accept a custom httpx client, so Gemini gets connection
+    reuse only (no HTTP/2). Reuse alone still skips the per-query handshake.
+    This is a SYNC client. It is only ever touched from one place at a time: the
+    daemon serializes queries, and ``keepalive_ping`` is gated by the caller on
+    "no active query", so the client is never used concurrently.
+    """
+    from google import genai
+    return genai.Client(api_key=api_key)
+def _get(provider: str, api_key: str, builder):
+    cached = _CLIENTS.get(provider)
+    if cached is not None and cached[0] == api_key:
+        return cached[1]
+    if cached is not None:
+        # Key rotated — drop the stale client and close it without blocking.
+        _schedule_close(cached[1])
+    client = builder(api_key)
+    _CLIENTS[provider] = (api_key, client)
+    return client
+def get_anthropic_client(api_key: str):
+    return _get("anthropic", api_key, _build_anthropic)
+def get_gemini_client(api_key: str):
+    return _get("google", api_key, _build_gemini)
+async def _aclose(client) -> None:
+    """Best-effort close of an SDK client or raw httpx client. Tries async
+    ``aclose`` first, then ``close`` (awaiting it if it returns a coroutine)."""
+    try:
+        aclose = getattr(client, "aclose", None)
+        if aclose is not None:
+            await aclose()
+            return
+        close = getattr(client, "close", None)
+        if close is not None:
+            res = close()
+            if inspect.isawaitable(res):
+                await res
+    except Exception as exc:
+        logger.info("llm client close failed (best-effort): %s", exc)
+def _schedule_close(client) -> None:
+    """Close a rotated-out client without blocking the caller. If an event loop is
+    running, schedule the async close; otherwise close synchronously."""
+    import asyncio
+    try:
+        loop = asyncio.get_running_loop()
+    except RuntimeError:
+        loop = None
+    if loop is not None:
+        task = loop.create_task(_aclose(client))
+        _PENDING_CLOSES.add(task)
+        task.add_done_callback(_PENDING_CLOSES.discard)
+    else:
+        # No running loop in this thread — close synchronously. Best-effort: a
+        # rotated-out client teardown must never break the caller's query path.
+        try:
+            asyncio.run(_aclose(client))
+        except Exception as exc:
+            logger.info("llm client sync close failed (best-effort): %s", exc)
+async def close_all() -> None:
+    """Close every cached client. Called at daemon shutdown. Best-effort."""
+    clients = [c for _, (_, c) in _CLIENTS.items()]
+    _CLIENTS.clear()
+    for client in clients:
+        await _aclose(client)
+    # Drain any in-flight rotation-close tasks so the loop does not tear down with
+    # a pending close (avoids the 3.12 "Task was destroyed" warning).
+    if _PENDING_CLOSES:
+        import asyncio
+        await asyncio.gather(*list(_PENDING_CLOSES), return_exceptions=True)
+# ---------------------------------------------------------------------------
+# Keep-alive heartbeat
+#
+# httpx keeps an idle pooled socket only for `keepalive_expiry` (300s), and a
+# provider/proxy/VPN often closes an idle keep-alive connection much sooner
+# (~60-120s). Without traffic the socket goes cold mid-session and the next query
+# pays a fresh handshake. Mirroring Deepgram's websocket heartbeat, while the
+# daemon runs we send a cheap, no-token request (a model list) on each pooled
+# client every ~30s so the socket never idles out. The daemon main loop drives the
+# cadence and gates on "no active query".
+# ---------------------------------------------------------------------------
+async def _ping_client(provider: str, client) -> None:
+    """One cheap, no-token request that keeps the pooled socket warm. Uses the
+    provider's model-list endpoint (a GET, no completion, no tokens billed)."""
+    if provider == "anthropic":
+        await client.models.list()
+    elif provider == "google":
+        import asyncio
+        # google-genai's Client is synchronous; touch it off the event loop.
+        await asyncio.to_thread(lambda: next(iter(client.models.list()), None))
+async def keepalive_ping() -> None:
+    """Touch every cached client's connection so the pooled TCP+TLS socket stays
+    warm. Best-effort: a failed ping (transient network, or a 401 on an OAuth
+    models endpoint) is logged at INFO and never raised — the connection still
+    pooled, and a real query re-establishes it if needed."""
+    for provider, (_api_key, client) in list(_CLIENTS.items()):
+        try:
+            await _ping_client(provider, client)
+        except Exception as exc:
+            logger.info("llm keepalive ping failed for %s (best-effort): %s", provider, exc)
+async def prewarm(config) -> None:
+    """Build and warm the pooled client for every provider that has a configured
+    key, so the FIRST query of a session skips the handshake too. Called once at
+    daemon startup. Best-effort per provider (a missing SDK or bad key never
+    breaks the others)."""
+    for getter, key in (
+        (get_anthropic_client, getattr(config, "anthropic_api_key", "") or ""),
+        (get_gemini_client, getattr(config, "google_api_key", "") or ""),
+    ):
+        if key:
+            try:
+                getter(key)
+            except Exception as exc:
+                logger.info("llm prewarm build failed (best-effort): %s", exc)
+    await keepalive_ping()

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-interview-assistant
-Version: 2.2.0
+Version: 2.2.2
 Summary: Ghost background AI assistant for live code challenges
 Requires-Python: >=3.11
 Requires-Dist: click>=8.0
@@ -27,5 +27,9 @@ Requires-Dist: simple-term-menu>=1.6.0
 Requires-Dist: datadog>=0.49.0
 Requires-Dist: psutil>=5.9
 Requires-Dist: httpx>=0.27.0
+Requires-Dist: h2>=4.0
 Requires-Dist: flet>=0.25.0
 Requires-Dist: PyPDF2>=3.0
+Provides-Extra: dev
+Requires-Dist: pytest>=8.0; extra == "dev"
+Requires-Dist: pytest-asyncio>=0.23; extra == "dev"

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/SOURCES.txt RENAMED Viewed

@@ -10,6 +10,7 @@ src/ai_interview/daemon.py
 src/ai_interview/hotkey_config.py
 src/ai_interview/hotkeys.py
 src/ai_interview/i18n.py
+src/ai_interview/llm_clients.py
 src/ai_interview/menubar.py
 src/ai_interview/metrics.py
 src/ai_interview/ollama_utils.py
@@ -38,4 +39,5 @@ src/ai_interview_assistant.egg-info/SOURCES.txt
 src/ai_interview_assistant.egg-info/dependency_links.txt
 src/ai_interview_assistant.egg-info/entry_points.txt
 src/ai_interview_assistant.egg-info/requires.txt
-src/ai_interview_assistant.egg-info/top_level.txt
+src/ai_interview_assistant.egg-info/top_level.txt
+tests/test_llm_clients.py

{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/requires.txt RENAMED Viewed

@@ -16,6 +16,7 @@ simple-term-menu>=1.6.0
 datadog>=0.49.0
 psutil>=5.9
 httpx>=0.27.0
+h2>=4.0
 flet>=0.25.0
 PyPDF2>=3.0
@@ -26,3 +27,7 @@ pyobjc-framework-Quartz>=10.0
 pyobjc-framework-ScreenCaptureKit>=10.0
 pyobjc-framework-CoreMedia>=10.0
 pyobjc-framework-libdispatch>=10.0
+[dev]
+pytest>=8.0
+pytest-asyncio>=0.23

ai_interview_assistant-2.2.2/tests/test_llm_clients.py ADDED Viewed

@@ -0,0 +1,257 @@
+"""Unit tests for ai_interview.llm_clients (no network).
+Covers the acceptance criteria in docs/perf/SPEC-llm-connection-reuse.md:
+connection reuse, key-rotation close, OAuth header handling, HTTP/2 fallback,
+and best-effort keepalive/prewarm/close_all.
+"""
+from __future__ import annotations
+import asyncio
+from types import SimpleNamespace
+import pytest
+from ai_interview import llm_clients
+@pytest.fixture(autouse=True)
+def _reset_module_state():
+    llm_clients._CLIENTS.clear()
+    llm_clients._PENDING_CLOSES.clear()
+    llm_clients._h2_unavailable_logged = False
+    llm_clients._h2_enabled_logged = False
+    yield
+    llm_clients._CLIENTS.clear()
+    llm_clients._PENDING_CLOSES.clear()
+class FakeClient:
+    """Stand-in SDK client with an async aclose() that records teardown."""
+    def __init__(self, key="k"):
+        self.key = key
+        self.closed = False
+    async def aclose(self):
+        self.closed = True
+# --------------------------------------------------------------------------
+# Reuse + key rotation
+# --------------------------------------------------------------------------
+def test_anthropic_client_reused_for_same_key(monkeypatch):
+    builds = []
+    def fake_build(key):
+        builds.append(key)
+        return FakeClient(key)
+    monkeypatch.setattr(llm_clients, "_build_anthropic", fake_build)
+    c1 = llm_clients.get_anthropic_client("k1")
+    c2 = llm_clients.get_anthropic_client("k1")
+    assert c1 is c2, "same key must reuse the cached client"
+    assert builds == ["k1"], "client must be built exactly once"
+def test_gemini_client_reused_for_same_key(monkeypatch):
+    builds = []
+    monkeypatch.setattr(
+        llm_clients, "_build_gemini", lambda key: builds.append(key) or FakeClient(key)
+    )
+    c1 = llm_clients.get_gemini_client("g1")
+    c2 = llm_clients.get_gemini_client("g1")
+    assert c1 is c2
+    assert builds == ["g1"]
+def test_key_rotation_rebuilds_and_closes_stale(monkeypatch):
+    monkeypatch.setattr(llm_clients, "_build_anthropic", lambda key: FakeClient(key))
+    old = llm_clients.get_anthropic_client("k1")
+    new = llm_clients.get_anthropic_client("k2")  # no running loop -> sync close
+    assert new is not old, "rotated key must build a fresh client"
+    assert old.closed is True, "stale client must be closed on rotation"
+    assert llm_clients._CLIENTS["anthropic"] == ("k2", new)
+# --------------------------------------------------------------------------
+# OAuth header handling (must match _stream_claude's prior behavior)
+# --------------------------------------------------------------------------
+def test_build_anthropic_oauth_uses_bearer_header(monkeypatch):
+    import anthropic
+    captured = {}
+    class FakeAA:
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+    monkeypatch.setattr(anthropic, "AsyncAnthropic", FakeAA)
+    monkeypatch.setattr(llm_clients, "_httpx_client", lambda: "HTTPX")
+    llm_clients._build_anthropic("sk-ant-oat-secret")
+    assert captured["api_key"] == "placeholder"
+    assert captured["default_headers"]["Authorization"] == "Bearer sk-ant-oat-secret"
+    assert captured["http_client"] == "HTTPX"
+def test_build_anthropic_normal_key_no_bearer(monkeypatch):
+    import anthropic
+    captured = {}
+    class FakeAA:
+        def __init__(self, **kwargs):
+            captured.update(kwargs)
+    monkeypatch.setattr(anthropic, "AsyncAnthropic", FakeAA)
+    monkeypatch.setattr(llm_clients, "_httpx_client", lambda: "HTTPX")
+    llm_clients._build_anthropic("sk-ant-api03-real")
+    assert captured["api_key"] == "sk-ant-api03-real"
+    assert "default_headers" not in captured
+    assert captured["http_client"] == "HTTPX"
+# --------------------------------------------------------------------------
+# HTTP/2 with graceful HTTP/1.1 fallback
+# --------------------------------------------------------------------------
+def test_httpx_client_requests_http2(monkeypatch):
+    import httpx
+    calls = []
+    class FakeAsyncClient:
+        def __init__(self, **kwargs):
+            calls.append(kwargs)
+    monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
+    llm_clients._httpx_client()
+    assert calls[-1]["http2"] is True
+    assert calls[-1]["limits"].max_keepalive_connections == 10
+def test_httpx_client_falls_back_to_http1_when_h2_missing(monkeypatch):
+    import httpx
+    calls = []
+    class FakeAsyncClient:
+        def __init__(self, **kwargs):
+            if kwargs.get("http2"):
+                raise ImportError("Using http2=True, but the 'h2' package is not installed")
+            calls.append(kwargs)
+    monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
+    llm_clients._httpx_client()  # must not raise
+    assert calls[-1]["http2"] is False, "must fall back to HTTP/1.1 when h2 missing"
+    assert llm_clients._h2_unavailable_logged is True
+# --------------------------------------------------------------------------
+# Keepalive — best-effort
+# --------------------------------------------------------------------------
+def test_keepalive_ping_empty_cache_is_noop():
+    asyncio.run(llm_clients.keepalive_ping())  # no clients -> must not raise
+def test_keepalive_ping_swallows_failing_client():
+    class Boom:
+        class models:
+            @staticmethod
+            async def list():
+                raise RuntimeError("network down")
+    llm_clients._CLIENTS["anthropic"] = ("k", Boom())
+    asyncio.run(llm_clients.keepalive_ping())  # must swallow, not raise
+def test_keepalive_ping_calls_models_list():
+    hits = {"n": 0}
+    class Good:
+        class models:
+            @staticmethod
+            async def list():
+                hits["n"] += 1
+    llm_clients._CLIENTS["anthropic"] = ("k", Good())
+    asyncio.run(llm_clients.keepalive_ping())
+    assert hits["n"] == 1
+# --------------------------------------------------------------------------
+# prewarm — best-effort, only configured keys
+# --------------------------------------------------------------------------
+def test_prewarm_builds_only_configured_keys(monkeypatch):
+    built = []
+    monkeypatch.setattr(llm_clients, "get_anthropic_client", lambda k: built.append(("a", k)))
+    monkeypatch.setattr(llm_clients, "get_gemini_client", lambda k: built.append(("g", k)))
+    async def _noping():
+        return None
+    monkeypatch.setattr(llm_clients, "keepalive_ping", _noping)
+    cfg = SimpleNamespace(anthropic_api_key="k1", google_api_key="")
+    asyncio.run(llm_clients.prewarm(cfg))
+    assert built == [("a", "k1")], "only the configured anthropic key should build"
+def test_prewarm_tolerates_raising_builder(monkeypatch):
+    def boom(_k):
+        raise RuntimeError("bad key")
+    monkeypatch.setattr(llm_clients, "get_anthropic_client", boom)
+    monkeypatch.setattr(llm_clients, "get_gemini_client", boom)
+    async def _noping():
+        return None
+    monkeypatch.setattr(llm_clients, "keepalive_ping", _noping)
+    cfg = SimpleNamespace(anthropic_api_key="k1", google_api_key="g1")
+    asyncio.run(llm_clients.prewarm(cfg))  # must not raise
+# --------------------------------------------------------------------------
+# close_all
+# --------------------------------------------------------------------------
+def test_close_all_closes_and_clears():
+    fc_a = FakeClient("a")
+    fc_g = FakeClient("g")
+    llm_clients._CLIENTS["anthropic"] = ("ka", fc_a)
+    llm_clients._CLIENTS["google"] = ("kg", fc_g)
+    asyncio.run(llm_clients.close_all())
+    assert fc_a.closed and fc_g.closed
+    assert llm_clients._CLIENTS == {}
+def test_close_all_swallows_failing_close():
+    class BadClose:
+        async def aclose(self):
+            raise RuntimeError("close blew up")
+    llm_clients._CLIENTS["anthropic"] = ("k", BadClose())
+    asyncio.run(llm_clients.close_all())  # must swallow
+    assert llm_clients._CLIENTS == {}