ai-interview-assistant 2.2.0__tar.gz → 2.2.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/PKG-INFO +5 -1
  2. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/pyproject.toml +8 -1
  3. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/__init__.py +1 -1
  4. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/ai_client.py +10 -10
  5. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/config.py +32 -5
  6. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/daemon.py +26 -0
  7. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/dashboard.py +86 -34
  8. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/i18n.py +2 -1
  9. ai_interview_assistant-2.2.2/src/ai_interview/llm_clients.py +225 -0
  10. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/PKG-INFO +5 -1
  11. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/SOURCES.txt +3 -1
  12. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/requires.txt +5 -0
  13. ai_interview_assistant-2.2.2/tests/test_llm_clients.py +257 -0
  14. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/README.md +0 -0
  15. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/setup.cfg +0 -0
  16. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/__main__.py +0 -0
  17. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/__init__.py +0 -0
  18. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/capture.py +0 -0
  19. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/transcriber.py +0 -0
  20. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/buffer.py +0 -0
  21. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/cli.py +0 -0
  22. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/__init__.py +0 -0
  23. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/__main__.py +0 -0
  24. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/app.py +0 -0
  25. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/__init__.py +0 -0
  26. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/hotkeys.py +0 -0
  27. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/scripts.py +0 -0
  28. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/settings.py +0 -0
  29. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/hotkey_config.py +0 -0
  30. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/hotkeys.py +0 -0
  31. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/menubar.py +0 -0
  32. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/metrics.py +0 -0
  33. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/ollama_utils.py +0 -0
  34. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/overlay.py +0 -0
  35. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/screenshot.py +0 -0
  36. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/__init__.py +0 -0
  37. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/app.py +0 -0
  38. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/routes.py +0 -0
  39. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/websocket.py +0 -0
  40. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/state.py +0 -0
  41. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/utils.py +0 -0
  42. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/watchdog.py +0 -0
  43. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/dependency_links.txt +0 -0
  44. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/entry_points.txt +0 -0
  45. {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-interview-assistant
3
- Version: 2.2.0
3
+ Version: 2.2.2
4
4
  Summary: Ghost background AI assistant for live code challenges
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: click>=8.0
@@ -27,5 +27,9 @@ Requires-Dist: simple-term-menu>=1.6.0
27
27
  Requires-Dist: datadog>=0.49.0
28
28
  Requires-Dist: psutil>=5.9
29
29
  Requires-Dist: httpx>=0.27.0
30
+ Requires-Dist: h2>=4.0
30
31
  Requires-Dist: flet>=0.25.0
31
32
  Requires-Dist: PyPDF2>=3.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=8.0; extra == "dev"
35
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ai-interview-assistant"
7
- version = "2.2.0"
7
+ version = "2.2.2"
8
8
  description = "Ghost background AI assistant for live code challenges"
9
9
  requires-python = ">=3.11"
10
10
  dependencies = [
@@ -32,10 +32,17 @@ dependencies = [
32
32
  "datadog>=0.49.0",
33
33
  "psutil>=5.9",
34
34
  "httpx>=0.27.0",
35
+ "h2>=4.0",
35
36
  "flet>=0.25.0",
36
37
  "PyPDF2>=3.0",
37
38
  ]
38
39
 
40
+ [project.optional-dependencies]
41
+ dev = [
42
+ "pytest>=8.0",
43
+ "pytest-asyncio>=0.23",
44
+ ]
45
+
39
46
  [project.scripts]
40
47
  ai-interview = "ai_interview.cli:cli"
41
48
 
@@ -1,3 +1,3 @@
1
1
  """AI Interview Assistant — ghost background tool for live code challenges."""
2
2
 
3
- __version__ = "2.2.0"
3
+ __version__ = "2.2.2"
@@ -139,16 +139,13 @@ async def _stream_claude(state: "AppState", config: "Config", messages: list, cu
139
139
  """Stream response from Claude API with retry on transient errors."""
140
140
  from ai_interview.server.websocket import broadcast
141
141
  from ai_interview.metrics import metrics
142
+ from ai_interview import llm_clients
142
143
  import anthropic
143
144
 
144
- is_oauth = config.anthropic_api_key.startswith("sk-ant-oat")
145
- if is_oauth:
146
- client = anthropic.AsyncAnthropic(
147
- api_key="placeholder",
148
- default_headers={"Authorization": f"Bearer {config.anthropic_api_key}"},
149
- )
150
- else:
151
- client = anthropic.AsyncAnthropic(api_key=config.anthropic_api_key)
145
+ # Shared, connection-pooled HTTP/2 client, reused across queries so query 2+
146
+ # skip the DNS+TCP+TLS handshake. OAuth (sk-ant-oat) handling lives in the
147
+ # builder. Do NOT close it here — it is shared and closed at daemon shutdown.
148
+ client = llm_clients.get_anthropic_client(config.anthropic_api_key)
152
149
 
153
150
  full_text = ""
154
151
  _t0 = time.monotonic()
@@ -262,10 +259,13 @@ async def _stream_gemini(state: "AppState", config: "Config", messages: list, cu
262
259
  _system_prompt = system_prompt or config.system_prompt
263
260
 
264
261
  try:
265
- from google import genai
266
262
  from google.genai import types
263
+ from ai_interview import llm_clients
267
264
 
268
- client = genai.Client(api_key=config.google_api_key)
265
+ # Shared client reused across queries (connection reuse; google-genai
266
+ # accepts no custom httpx client, so no HTTP/2). Serialized query model +
267
+ # idle-gated keepalive mean it is never used concurrently.
268
+ client = llm_clients.get_gemini_client(config.google_api_key)
269
269
  contents = _messages_to_gemini_contents(messages, _system_prompt)
270
270
 
271
271
  # Run synchronous streaming in a thread to avoid blocking asyncio
@@ -188,6 +188,25 @@ def _read_cv_text(cv_path: str) -> str:
188
188
  return ""
189
189
 
190
190
 
191
+ def _assemble_with_context(base_prompt: str, codebase: str, cv_text: str) -> str:
192
+ """Append codebase and CV context blocks to a user-supplied base prompt."""
193
+ prompt = base_prompt
194
+ if codebase:
195
+ prompt += (
196
+ "\n\n---\nThe interview is about the following codebase. "
197
+ "Use it to answer questions about the code directly.\n\n"
198
+ + codebase
199
+ )
200
+ if cv_text:
201
+ prompt += (
202
+ "\n\nCandidate's CV/Resume:\n"
203
+ f"{cv_text}\n\n"
204
+ "When answering behavioural or experience questions, reference real projects "
205
+ "and roles from this CV. Keep answers first-person as if the candidate is speaking.\n"
206
+ )
207
+ return prompt
208
+
209
+
191
210
  def _build_system_prompt(
192
211
  language: str = "",
193
212
  challenge_type: str = "",
@@ -306,6 +325,7 @@ class Config:
306
325
  whisper_host: str = "" # Remote faster-whisper-server URL (e.g. http://192.168.1.50:8000)
307
326
  show_menubar_icon: bool = True
308
327
  scripts: dict = field(default_factory=dict)
328
+ user_system_prompt: str = "" # user-edited base prompt from dashboard preview
309
329
  system_prompt: str = field(default="")
310
330
 
311
331
  @property
@@ -340,11 +360,17 @@ class Config:
340
360
  if not self.system_prompt:
341
361
  codebase = read_codebase_context(self.context_path) if self.context_path else ""
342
362
  cv_text = _read_cv_text(self.cv_path)
343
- self.system_prompt = _build_system_prompt(
344
- self.interview_language, self.challenge_type, codebase,
345
- self.role_context, self.transcription_language, cv_text,
346
- scripts=self.scripts,
347
- )
363
+ if self.user_system_prompt:
364
+ # User customised the base prompt — use it and still inject codebase/CV
365
+ self.system_prompt = _assemble_with_context(
366
+ self.user_system_prompt, codebase, cv_text
367
+ )
368
+ else:
369
+ self.system_prompt = _build_system_prompt(
370
+ self.interview_language, self.challenge_type, codebase,
371
+ self.role_context, self.transcription_language, cv_text,
372
+ scripts=self.scripts,
373
+ )
348
374
 
349
375
  @classmethod
350
376
  def from_saved(cls, overrides: Optional[dict] = None) -> "Config":
@@ -392,4 +418,5 @@ class Config:
392
418
  whisper_host=merged.get("whisper_host", ""),
393
419
  show_menubar_icon=merged.get("show_menubar_icon", True),
394
420
  scripts=merged.get("scripts", {}),
421
+ user_system_prompt=merged.get("user_system_prompt", ""),
395
422
  )
@@ -279,10 +279,19 @@ def run_daemon(config: Config) -> None:
279
279
  await site.start()
280
280
  logger.info("Viewer server on 0.0.0.0:%d", config.port)
281
281
 
282
+ # Pre-warm pooled LLM connections (HTTP/2 + keep-alive) so the very first
283
+ # query skips the DNS+TCP+TLS handshake. Best-effort — never blocks start.
284
+ from ai_interview import llm_clients
285
+ try:
286
+ await llm_clients.prewarm(config)
287
+ except Exception as exc:
288
+ logger.info("LLM prewarm failed (best-effort): %s", exc)
289
+
282
290
  _MAX_IDLE_S = 30 * 60 # 30 min idle → auto-shutdown
283
291
  state.last_activity_at = time.time() # initialise so first check is fair
284
292
  _last_heartbeat_uptime = -1
285
293
  _last_cpu_broadcast = 0.0
294
+ _last_llm_keepalive = time.time() # prewarm just pinged; next ping in ~30s
286
295
 
287
296
  # Prime psutil baseline — first call always returns 0.0, real values start from second call
288
297
  try:
@@ -323,11 +332,28 @@ def run_daemon(config: Config) -> None:
323
332
  except Exception:
324
333
  pass
325
334
 
335
+ # Keep pooled LLM sockets warm (~30s cadence) so a query after a long
336
+ # idle gap doesn't pay a fresh handshake. Only ping while idle: an
337
+ # in-flight query already keeps the connection hot, and skipping then
338
+ # avoids touching the shared client concurrently. Fire-and-forget so
339
+ # the 0.5s health loop never blocks on the network.
340
+ if now - _last_llm_keepalive >= 30:
341
+ _last_llm_keepalive = now
342
+ _aq = state.active_query
343
+ if _aq is None or _aq.done():
344
+ asyncio.ensure_future(llm_clients.keepalive_ping())
345
+
326
346
  # Auto-shutdown after 30 min of no hotkeys / queries / transcripts
327
347
  if idle >= _MAX_IDLE_S:
328
348
  logger.info("30-minute idle limit reached — shutting down automatically")
329
349
  shutdown_flag["stop"] = True
330
350
 
351
+ # Close pooled LLM clients in the loop they were created in (best-effort).
352
+ try:
353
+ await llm_clients.close_all()
354
+ except Exception as exc:
355
+ logger.info("LLM client close failed (best-effort): %s", exc)
356
+
331
357
  await runner.cleanup()
332
358
 
333
359
  try:
@@ -151,15 +151,32 @@ class DashboardScreen(ft.Column):
151
151
 
152
152
  # ---- Prompt preview ----
153
153
  self._prompt_preview_expanded = False
154
- self._prompt_preview = ft.Text(
155
- "",
156
- size=11,
157
- italic=True,
158
- color=MUTED,
159
- selectable=False,
154
+ self._preview_manually_edited = False
155
+ self._prompt_preview = ft.TextField(
156
+ value="",
157
+ multiline=True,
158
+ min_lines=8,
159
+ max_lines=20,
160
+ text_size=11,
161
+ border_radius=8,
162
+ border_color="#2a2a2a",
163
+ bgcolor="#111111",
164
+ content_padding=ft.padding.all(12),
165
+ text_style=ft.TextStyle(italic=True, color=MUTED),
160
166
  visible=False,
167
+ expand=True,
168
+ on_change=lambda _: setattr(self, "_preview_manually_edited", True),
161
169
  )
170
+ self._preview_notes = ft.Text("", size=10, color=MUTED, italic=True, visible=False)
162
171
  self._preview_chevron = ft.Text("▶", size=11, color=MUTED)
172
+ self._regenerate_btn = ft.IconButton(
173
+ icon=ft.Icons.REFRESH,
174
+ icon_size=14,
175
+ icon_color=MUTED,
176
+ tooltip=t("regenerate_preview"),
177
+ on_click=self._regenerate_preview,
178
+ visible=False,
179
+ )
163
180
  self._update_prompt_preview()
164
181
 
165
182
  self._lang_field.visible = challenge in ("code_challenge", "system_design")
@@ -218,19 +235,23 @@ class DashboardScreen(ft.Column):
218
235
  transcription_card,
219
236
  self._cv_check,
220
237
  ft.Column([
221
- ft.TextButton(
222
- content=ft.Row(
223
- [
224
- ft.Text(t("prompt_preview_label"), size=11, color=MUTED),
225
- self._preview_chevron,
226
- ],
227
- spacing=4,
228
- tight=True,
238
+ ft.Row([
239
+ ft.TextButton(
240
+ content=ft.Row(
241
+ [
242
+ ft.Text(t("prompt_preview_label"), size=11, color=MUTED),
243
+ self._preview_chevron,
244
+ ],
245
+ spacing=4,
246
+ tight=True,
247
+ ),
248
+ on_click=self._toggle_prompt_preview,
249
+ style=ft.ButtonStyle(padding=ft.padding.all(0), overlay_color=ft.Colors.TRANSPARENT),
229
250
  ),
230
- on_click=self._toggle_prompt_preview,
231
- style=ft.ButtonStyle(padding=ft.padding.all(0), overlay_color=ft.Colors.TRANSPARENT),
232
- ),
251
+ self._regenerate_btn,
252
+ ], spacing=0, vertical_alignment=ft.CrossAxisAlignment.CENTER),
233
253
  self._prompt_preview,
254
+ self._preview_notes,
234
255
  ], spacing=6),
235
256
  ft.Row([self._start_btn, self._start_status], spacing=12),
236
257
  ], spacing=12)
@@ -333,6 +354,15 @@ class DashboardScreen(ft.Column):
333
354
  self._cv_check.value = bool(cv_name)
334
355
  self._cv_check.disabled = not bool(cv_name)
335
356
 
357
+ # Restore saved prompt if present, otherwise regenerate
358
+ saved_prompt = fresh.get("user_system_prompt", "")
359
+ if saved_prompt:
360
+ self._prompt_preview.value = saved_prompt
361
+ self._preview_manually_edited = True
362
+ else:
363
+ self._preview_manually_edited = False
364
+ self._update_prompt_preview()
365
+
336
366
  self._update_model_options()
337
367
  try:
338
368
  self._page.update()
@@ -408,11 +438,24 @@ class DashboardScreen(ft.Column):
408
438
 
409
439
  def _toggle_prompt_preview(self, _e=None):
410
440
  self._prompt_preview_expanded = not self._prompt_preview_expanded
411
- self._prompt_preview.visible = self._prompt_preview_expanded
412
- self._preview_chevron.value = "▼" if self._prompt_preview_expanded else "▶"
441
+ expanded = self._prompt_preview_expanded
442
+ self._prompt_preview.visible = expanded
443
+ self._preview_notes.visible = expanded and bool(self._preview_notes.value)
444
+ self._regenerate_btn.visible = expanded
445
+ self._preview_chevron.value = "▼" if expanded else "▶"
413
446
  try:
414
447
  self._preview_chevron.update()
415
448
  self._prompt_preview.update()
449
+ self._preview_notes.update()
450
+ self._regenerate_btn.update()
451
+ except Exception:
452
+ pass
453
+
454
+ def _regenerate_preview(self, _e=None):
455
+ self._preview_manually_edited = False
456
+ self._update_prompt_preview()
457
+ try:
458
+ self._page.update()
416
459
  except Exception:
417
460
  pass
418
461
 
@@ -420,33 +463,41 @@ class DashboardScreen(ft.Column):
420
463
  """Rebuild the system prompt preview from current field values."""
421
464
  if not hasattr(self, "_cv_check") or not hasattr(self, "_prompt_preview"):
422
465
  return
423
- challenge = self._challenge_dropdown.value or "code_challenge"
424
- language = self._lang_field.value or ""
425
- brief = self._brief_field.value or ""
466
+
467
+ # Rebuild notes regardless of manual edit state
426
468
  context_path = (self._context_path.value or "").strip()
427
469
  cv_selected = self._cv_check.value
470
+ notes = []
471
+ if context_path:
472
+ notes.append(f"+ codebase context will be injected from: {context_path}")
473
+ if cv_selected and self._config.get("cv_path"):
474
+ notes.append(f"+ CV will be injected: {os.path.basename(self._config['cv_path'])}")
475
+ self._preview_notes.value = "\n".join(notes)
476
+ self._preview_notes.visible = self._prompt_preview_expanded and bool(notes)
477
+
478
+ if self._preview_manually_edited:
479
+ # User has edited the prompt — don't overwrite, just refresh notes
480
+ if self._prompt_preview_expanded:
481
+ try:
482
+ self._preview_notes.update()
483
+ except Exception:
484
+ pass
485
+ return
428
486
 
429
487
  scripts = {**DEFAULT_SCRIPTS, **self._config.get("scripts", {})}
430
488
  prompt = _build_system_prompt(
431
- language=language,
432
- challenge_type=challenge,
489
+ language=self._lang_field.value or "",
490
+ challenge_type=self._challenge_dropdown.value or "code_challenge",
433
491
  codebase_context="",
434
- role_context=brief,
492
+ role_context=self._brief_field.value or "",
435
493
  scripts=scripts,
436
494
  )
437
-
438
- notes = []
439
- if context_path:
440
- notes.append(f"+ codebase context from: {context_path}")
441
- if cv_selected and self._config.get("cv_path"):
442
- notes.append(f"+ CV: {os.path.basename(self._config['cv_path'])}")
443
- if notes:
444
- prompt += "\n\n" + "\n".join(notes)
445
-
446
495
  self._prompt_preview.value = prompt
496
+
447
497
  if self._prompt_preview_expanded:
448
498
  try:
449
499
  self._prompt_preview.update()
500
+ self._preview_notes.update()
450
501
  except Exception:
451
502
  pass
452
503
 
@@ -540,6 +591,7 @@ class DashboardScreen(ft.Column):
540
591
  config["challenge_type"] = self._challenge_dropdown.value or "code_challenge"
541
592
  config["transcription_language"] = self._trans_lang.value or "auto"
542
593
  config["context_path"] = (self._context_path.value or "").strip()
594
+ config["user_system_prompt"] = (self._prompt_preview.value or "").strip()
543
595
 
544
596
  # Per-session CV toggle — don't permanently clear the path
545
597
  if not self._cv_check.value:
@@ -155,7 +155,8 @@ _STRINGS = {
155
155
  "type_press_hold": "Tap + Hold",
156
156
 
157
157
  # Dashboard prompt preview
158
- "prompt_preview_label": "What will be sent to the AI (preview):",
158
+ "prompt_preview_label": "What will be sent to the AI:",
159
+ "regenerate_preview": "Regenerate from current selections",
159
160
 
160
161
  # AI Scripts
161
162
  "save": "Save",
@@ -0,0 +1,225 @@
1
+ """Shared, long-lived LLM SDK clients (connection reuse + HTTP/2 + heartbeat).
2
+
3
+ Every AI query used to build a fresh SDK client (``ai_client._stream_claude`` /
4
+ ``_stream_gemini``), paying a new DNS + TCP + TLS handshake on its first request.
5
+ On a high-latency link that handshake dominates time-to-first-token. Deepgram
6
+ never has this problem because it holds one persistent websocket for the whole
7
+ session.
8
+
9
+ This module mirrors that idea: one connection-pooled client per provider, reused
10
+ across every query in a daemon session. Query 1 pays the handshake; query 2+ skip
11
+ it. The Anthropic transport additionally uses HTTP/2 (multiplexing + the
12
+ connection stays open), and a periodic keep-alive heartbeat keeps the pooled
13
+ socket warm so it never idles out mid-session.
14
+
15
+ Design notes / invariants:
16
+ - TLS verification stays ON (this repo verifies via certifi; do NOT add
17
+ ``verify=False`` here).
18
+ - No top-level imports of ``ai_interview.*`` or of audio libraries — keep this
19
+ importable both before and after the daemon fork. SDKs are imported lazily.
20
+ - The daemon runs a single asyncio loop and serializes queries (a new query
21
+ cancels the in-flight one), so a shared client is never used by two queries at
22
+ once. The caller gates ``keepalive_ping`` on "no active query".
23
+ """
24
+ from __future__ import annotations
25
+
26
+ import inspect
27
+ import logging
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # provider -> (api_key, client_object)
32
+ _CLIENTS: dict[str, tuple[str, object]] = {}
33
+ # Strong refs to in-flight rotation-close tasks, so a fire-and-forget close is
34
+ # not GC'd before it runs (and so close_all can drain them at shutdown).
35
+ _PENDING_CLOSES: set = set()
36
+ # Latched so the HTTP/2 status (on, or downgraded to 1.1) is logged exactly once.
37
+ _h2_unavailable_logged = False
38
+ _h2_enabled_logged = False
39
+
40
+
41
+ def _httpx_client():
42
+ """A connection-pooled async httpx client tuned to stay warm across the gaps
43
+ between queries. HTTP/2 multiplexes and keeps the connection open;
44
+ ``keepalive_expiry`` outlives a typical idle gap.
45
+
46
+ HTTP/2 needs the optional ``h2`` package. If it is not installed, httpx raises
47
+ ``ImportError`` when ``http2=True``; we degrade to HTTP/1.1 (still pooled +
48
+ keep-alive) instead of breaking queries.
49
+ """
50
+ global _h2_unavailable_logged, _h2_enabled_logged
51
+ import httpx
52
+
53
+ limits = httpx.Limits(max_keepalive_connections=10, keepalive_expiry=300.0)
54
+ try:
55
+ client = httpx.AsyncClient(http2=True, limits=limits)
56
+ if not _h2_enabled_logged:
57
+ _h2_enabled_logged = True
58
+ logger.info("LLM transport: HTTP/2 enabled with connection reuse")
59
+ return client
60
+ except ImportError:
61
+ if not _h2_unavailable_logged:
62
+ _h2_unavailable_logged = True
63
+ logger.info(
64
+ "HTTP/2 unavailable (install 'h2' / httpx[http2]) — "
65
+ "using HTTP/1.1 with connection reuse"
66
+ )
67
+ return httpx.AsyncClient(http2=False, limits=limits)
68
+
69
+
70
+ def _build_anthropic(api_key: str):
71
+ """Build a pooled AsyncAnthropic, honoring the OAuth Bearer-token convention.
72
+
73
+ Mirrors ``ai_client._stream_claude``: an ``sk-ant-oat`` key is an OAuth token
74
+ sent via the Authorization header, not the api_key field.
75
+ """
76
+ import anthropic
77
+
78
+ if api_key.startswith("sk-ant-oat"):
79
+ return anthropic.AsyncAnthropic(
80
+ api_key="placeholder",
81
+ default_headers={"Authorization": f"Bearer {api_key}"},
82
+ http_client=_httpx_client(),
83
+ )
84
+ return anthropic.AsyncAnthropic(api_key=api_key, http_client=_httpx_client())
85
+
86
+
87
+ def _build_gemini(api_key: str):
88
+ """Build a reused genai.Client.
89
+
90
+ google-genai does not accept a custom httpx client, so Gemini gets connection
91
+ reuse only (no HTTP/2). Reuse alone still skips the per-query handshake.
92
+
93
+ This is a SYNC client. It is only ever touched from one place at a time: the
94
+ daemon serializes queries, and ``keepalive_ping`` is gated by the caller on
95
+ "no active query", so the client is never used concurrently.
96
+ """
97
+ from google import genai
98
+
99
+ return genai.Client(api_key=api_key)
100
+
101
+
102
+ def _get(provider: str, api_key: str, builder):
103
+ cached = _CLIENTS.get(provider)
104
+ if cached is not None and cached[0] == api_key:
105
+ return cached[1]
106
+ if cached is not None:
107
+ # Key rotated — drop the stale client and close it without blocking.
108
+ _schedule_close(cached[1])
109
+ client = builder(api_key)
110
+ _CLIENTS[provider] = (api_key, client)
111
+ return client
112
+
113
+
114
+ def get_anthropic_client(api_key: str):
115
+ return _get("anthropic", api_key, _build_anthropic)
116
+
117
+
118
+ def get_gemini_client(api_key: str):
119
+ return _get("google", api_key, _build_gemini)
120
+
121
+
122
+ async def _aclose(client) -> None:
123
+ """Best-effort close of an SDK client or raw httpx client. Tries async
124
+ ``aclose`` first, then ``close`` (awaiting it if it returns a coroutine)."""
125
+ try:
126
+ aclose = getattr(client, "aclose", None)
127
+ if aclose is not None:
128
+ await aclose()
129
+ return
130
+ close = getattr(client, "close", None)
131
+ if close is not None:
132
+ res = close()
133
+ if inspect.isawaitable(res):
134
+ await res
135
+ except Exception as exc:
136
+ logger.info("llm client close failed (best-effort): %s", exc)
137
+
138
+
139
+ def _schedule_close(client) -> None:
140
+ """Close a rotated-out client without blocking the caller. If an event loop is
141
+ running, schedule the async close; otherwise close synchronously."""
142
+ import asyncio
143
+
144
+ try:
145
+ loop = asyncio.get_running_loop()
146
+ except RuntimeError:
147
+ loop = None
148
+ if loop is not None:
149
+ task = loop.create_task(_aclose(client))
150
+ _PENDING_CLOSES.add(task)
151
+ task.add_done_callback(_PENDING_CLOSES.discard)
152
+ else:
153
+ # No running loop in this thread — close synchronously. Best-effort: a
154
+ # rotated-out client teardown must never break the caller's query path.
155
+ try:
156
+ asyncio.run(_aclose(client))
157
+ except Exception as exc:
158
+ logger.info("llm client sync close failed (best-effort): %s", exc)
159
+
160
+
161
+ async def close_all() -> None:
162
+ """Close every cached client. Called at daemon shutdown. Best-effort."""
163
+ clients = [c for _, (_, c) in _CLIENTS.items()]
164
+ _CLIENTS.clear()
165
+ for client in clients:
166
+ await _aclose(client)
167
+ # Drain any in-flight rotation-close tasks so the loop does not tear down with
168
+ # a pending close (avoids the 3.12 "Task was destroyed" warning).
169
+ if _PENDING_CLOSES:
170
+ import asyncio
171
+
172
+ await asyncio.gather(*list(_PENDING_CLOSES), return_exceptions=True)
173
+
174
+
175
+ # ---------------------------------------------------------------------------
176
+ # Keep-alive heartbeat
177
+ #
178
+ # httpx keeps an idle pooled socket only for `keepalive_expiry` (300s), and a
179
+ # provider/proxy/VPN often closes an idle keep-alive connection much sooner
180
+ # (~60-120s). Without traffic the socket goes cold mid-session and the next query
181
+ # pays a fresh handshake. Mirroring Deepgram's websocket heartbeat, while the
182
+ # daemon runs we send a cheap, no-token request (a model list) on each pooled
183
+ # client every ~30s so the socket never idles out. The daemon main loop drives the
184
+ # cadence and gates on "no active query".
185
+ # ---------------------------------------------------------------------------
186
+
187
+ async def _ping_client(provider: str, client) -> None:
188
+ """One cheap, no-token request that keeps the pooled socket warm. Uses the
189
+ provider's model-list endpoint (a GET, no completion, no tokens billed)."""
190
+ if provider == "anthropic":
191
+ await client.models.list()
192
+ elif provider == "google":
193
+ import asyncio
194
+
195
+ # google-genai's Client is synchronous; touch it off the event loop.
196
+ await asyncio.to_thread(lambda: next(iter(client.models.list()), None))
197
+
198
+
199
+ async def keepalive_ping() -> None:
200
+ """Touch every cached client's connection so the pooled TCP+TLS socket stays
201
+ warm. Best-effort: a failed ping (transient network, or a 401 on an OAuth
202
+ models endpoint) is logged at INFO and never raised — the connection still
203
+ pooled, and a real query re-establishes it if needed."""
204
+ for provider, (_api_key, client) in list(_CLIENTS.items()):
205
+ try:
206
+ await _ping_client(provider, client)
207
+ except Exception as exc:
208
+ logger.info("llm keepalive ping failed for %s (best-effort): %s", provider, exc)
209
+
210
+
211
+ async def prewarm(config) -> None:
212
+ """Build and warm the pooled client for every provider that has a configured
213
+ key, so the FIRST query of a session skips the handshake too. Called once at
214
+ daemon startup. Best-effort per provider (a missing SDK or bad key never
215
+ breaks the others)."""
216
+ for getter, key in (
217
+ (get_anthropic_client, getattr(config, "anthropic_api_key", "") or ""),
218
+ (get_gemini_client, getattr(config, "google_api_key", "") or ""),
219
+ ):
220
+ if key:
221
+ try:
222
+ getter(key)
223
+ except Exception as exc:
224
+ logger.info("llm prewarm build failed (best-effort): %s", exc)
225
+ await keepalive_ping()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ai-interview-assistant
3
- Version: 2.2.0
3
+ Version: 2.2.2
4
4
  Summary: Ghost background AI assistant for live code challenges
5
5
  Requires-Python: >=3.11
6
6
  Requires-Dist: click>=8.0
@@ -27,5 +27,9 @@ Requires-Dist: simple-term-menu>=1.6.0
27
27
  Requires-Dist: datadog>=0.49.0
28
28
  Requires-Dist: psutil>=5.9
29
29
  Requires-Dist: httpx>=0.27.0
30
+ Requires-Dist: h2>=4.0
30
31
  Requires-Dist: flet>=0.25.0
31
32
  Requires-Dist: PyPDF2>=3.0
33
+ Provides-Extra: dev
34
+ Requires-Dist: pytest>=8.0; extra == "dev"
35
+ Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
@@ -10,6 +10,7 @@ src/ai_interview/daemon.py
10
10
  src/ai_interview/hotkey_config.py
11
11
  src/ai_interview/hotkeys.py
12
12
  src/ai_interview/i18n.py
13
+ src/ai_interview/llm_clients.py
13
14
  src/ai_interview/menubar.py
14
15
  src/ai_interview/metrics.py
15
16
  src/ai_interview/ollama_utils.py
@@ -38,4 +39,5 @@ src/ai_interview_assistant.egg-info/SOURCES.txt
38
39
  src/ai_interview_assistant.egg-info/dependency_links.txt
39
40
  src/ai_interview_assistant.egg-info/entry_points.txt
40
41
  src/ai_interview_assistant.egg-info/requires.txt
41
- src/ai_interview_assistant.egg-info/top_level.txt
42
+ src/ai_interview_assistant.egg-info/top_level.txt
43
+ tests/test_llm_clients.py
@@ -16,6 +16,7 @@ simple-term-menu>=1.6.0
16
16
  datadog>=0.49.0
17
17
  psutil>=5.9
18
18
  httpx>=0.27.0
19
+ h2>=4.0
19
20
  flet>=0.25.0
20
21
  PyPDF2>=3.0
21
22
 
@@ -26,3 +27,7 @@ pyobjc-framework-Quartz>=10.0
26
27
  pyobjc-framework-ScreenCaptureKit>=10.0
27
28
  pyobjc-framework-CoreMedia>=10.0
28
29
  pyobjc-framework-libdispatch>=10.0
30
+
31
+ [dev]
32
+ pytest>=8.0
33
+ pytest-asyncio>=0.23
@@ -0,0 +1,257 @@
1
+ """Unit tests for ai_interview.llm_clients (no network).
2
+
3
+ Covers the acceptance criteria in docs/perf/SPEC-llm-connection-reuse.md:
4
+ connection reuse, key-rotation close, OAuth header handling, HTTP/2 fallback,
5
+ and best-effort keepalive/prewarm/close_all.
6
+ """
7
+ from __future__ import annotations
8
+
9
+ import asyncio
10
+ from types import SimpleNamespace
11
+
12
+ import pytest
13
+
14
+ from ai_interview import llm_clients
15
+
16
+
17
+ @pytest.fixture(autouse=True)
18
+ def _reset_module_state():
19
+ llm_clients._CLIENTS.clear()
20
+ llm_clients._PENDING_CLOSES.clear()
21
+ llm_clients._h2_unavailable_logged = False
22
+ llm_clients._h2_enabled_logged = False
23
+ yield
24
+ llm_clients._CLIENTS.clear()
25
+ llm_clients._PENDING_CLOSES.clear()
26
+
27
+
28
+ class FakeClient:
29
+ """Stand-in SDK client with an async aclose() that records teardown."""
30
+
31
+ def __init__(self, key="k"):
32
+ self.key = key
33
+ self.closed = False
34
+
35
+ async def aclose(self):
36
+ self.closed = True
37
+
38
+
39
+ # --------------------------------------------------------------------------
40
+ # Reuse + key rotation
41
+ # --------------------------------------------------------------------------
42
+
43
+ def test_anthropic_client_reused_for_same_key(monkeypatch):
44
+ builds = []
45
+
46
+ def fake_build(key):
47
+ builds.append(key)
48
+ return FakeClient(key)
49
+
50
+ monkeypatch.setattr(llm_clients, "_build_anthropic", fake_build)
51
+
52
+ c1 = llm_clients.get_anthropic_client("k1")
53
+ c2 = llm_clients.get_anthropic_client("k1")
54
+
55
+ assert c1 is c2, "same key must reuse the cached client"
56
+ assert builds == ["k1"], "client must be built exactly once"
57
+
58
+
59
+ def test_gemini_client_reused_for_same_key(monkeypatch):
60
+ builds = []
61
+ monkeypatch.setattr(
62
+ llm_clients, "_build_gemini", lambda key: builds.append(key) or FakeClient(key)
63
+ )
64
+
65
+ c1 = llm_clients.get_gemini_client("g1")
66
+ c2 = llm_clients.get_gemini_client("g1")
67
+
68
+ assert c1 is c2
69
+ assert builds == ["g1"]
70
+
71
+
72
+ def test_key_rotation_rebuilds_and_closes_stale(monkeypatch):
73
+ monkeypatch.setattr(llm_clients, "_build_anthropic", lambda key: FakeClient(key))
74
+
75
+ old = llm_clients.get_anthropic_client("k1")
76
+ new = llm_clients.get_anthropic_client("k2") # no running loop -> sync close
77
+
78
+ assert new is not old, "rotated key must build a fresh client"
79
+ assert old.closed is True, "stale client must be closed on rotation"
80
+ assert llm_clients._CLIENTS["anthropic"] == ("k2", new)
81
+
82
+
83
+ # --------------------------------------------------------------------------
84
+ # OAuth header handling (must match _stream_claude's prior behavior)
85
+ # --------------------------------------------------------------------------
86
+
87
+ def test_build_anthropic_oauth_uses_bearer_header(monkeypatch):
88
+ import anthropic
89
+
90
+ captured = {}
91
+
92
+ class FakeAA:
93
+ def __init__(self, **kwargs):
94
+ captured.update(kwargs)
95
+
96
+ monkeypatch.setattr(anthropic, "AsyncAnthropic", FakeAA)
97
+ monkeypatch.setattr(llm_clients, "_httpx_client", lambda: "HTTPX")
98
+
99
+ llm_clients._build_anthropic("sk-ant-oat-secret")
100
+
101
+ assert captured["api_key"] == "placeholder"
102
+ assert captured["default_headers"]["Authorization"] == "Bearer sk-ant-oat-secret"
103
+ assert captured["http_client"] == "HTTPX"
104
+
105
+
106
+ def test_build_anthropic_normal_key_no_bearer(monkeypatch):
107
+ import anthropic
108
+
109
+ captured = {}
110
+
111
+ class FakeAA:
112
+ def __init__(self, **kwargs):
113
+ captured.update(kwargs)
114
+
115
+ monkeypatch.setattr(anthropic, "AsyncAnthropic", FakeAA)
116
+ monkeypatch.setattr(llm_clients, "_httpx_client", lambda: "HTTPX")
117
+
118
+ llm_clients._build_anthropic("sk-ant-api03-real")
119
+
120
+ assert captured["api_key"] == "sk-ant-api03-real"
121
+ assert "default_headers" not in captured
122
+ assert captured["http_client"] == "HTTPX"
123
+
124
+
125
+ # --------------------------------------------------------------------------
126
+ # HTTP/2 with graceful HTTP/1.1 fallback
127
+ # --------------------------------------------------------------------------
128
+
129
+ def test_httpx_client_requests_http2(monkeypatch):
130
+ import httpx
131
+
132
+ calls = []
133
+
134
+ class FakeAsyncClient:
135
+ def __init__(self, **kwargs):
136
+ calls.append(kwargs)
137
+
138
+ monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
139
+
140
+ llm_clients._httpx_client()
141
+
142
+ assert calls[-1]["http2"] is True
143
+ assert calls[-1]["limits"].max_keepalive_connections == 10
144
+
145
+
146
+ def test_httpx_client_falls_back_to_http1_when_h2_missing(monkeypatch):
147
+ import httpx
148
+
149
+ calls = []
150
+
151
+ class FakeAsyncClient:
152
+ def __init__(self, **kwargs):
153
+ if kwargs.get("http2"):
154
+ raise ImportError("Using http2=True, but the 'h2' package is not installed")
155
+ calls.append(kwargs)
156
+
157
+ monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
158
+
159
+ llm_clients._httpx_client() # must not raise
160
+
161
+ assert calls[-1]["http2"] is False, "must fall back to HTTP/1.1 when h2 missing"
162
+ assert llm_clients._h2_unavailable_logged is True
163
+
164
+
165
+ # --------------------------------------------------------------------------
166
+ # Keepalive — best-effort
167
+ # --------------------------------------------------------------------------
168
+
169
+ def test_keepalive_ping_empty_cache_is_noop():
170
+ asyncio.run(llm_clients.keepalive_ping()) # no clients -> must not raise
171
+
172
+
173
+ def test_keepalive_ping_swallows_failing_client():
174
+ class Boom:
175
+ class models:
176
+ @staticmethod
177
+ async def list():
178
+ raise RuntimeError("network down")
179
+
180
+ llm_clients._CLIENTS["anthropic"] = ("k", Boom())
181
+ asyncio.run(llm_clients.keepalive_ping()) # must swallow, not raise
182
+
183
+
184
+ def test_keepalive_ping_calls_models_list():
185
+ hits = {"n": 0}
186
+
187
+ class Good:
188
+ class models:
189
+ @staticmethod
190
+ async def list():
191
+ hits["n"] += 1
192
+
193
+ llm_clients._CLIENTS["anthropic"] = ("k", Good())
194
+ asyncio.run(llm_clients.keepalive_ping())
195
+ assert hits["n"] == 1
196
+
197
+
198
+ # --------------------------------------------------------------------------
199
+ # prewarm — best-effort, only configured keys
200
+ # --------------------------------------------------------------------------
201
+
202
+ def test_prewarm_builds_only_configured_keys(monkeypatch):
203
+ built = []
204
+ monkeypatch.setattr(llm_clients, "get_anthropic_client", lambda k: built.append(("a", k)))
205
+ monkeypatch.setattr(llm_clients, "get_gemini_client", lambda k: built.append(("g", k)))
206
+
207
+ async def _noping():
208
+ return None
209
+
210
+ monkeypatch.setattr(llm_clients, "keepalive_ping", _noping)
211
+
212
+ cfg = SimpleNamespace(anthropic_api_key="k1", google_api_key="")
213
+ asyncio.run(llm_clients.prewarm(cfg))
214
+
215
+ assert built == [("a", "k1")], "only the configured anthropic key should build"
216
+
217
+
218
+ def test_prewarm_tolerates_raising_builder(monkeypatch):
219
+ def boom(_k):
220
+ raise RuntimeError("bad key")
221
+
222
+ monkeypatch.setattr(llm_clients, "get_anthropic_client", boom)
223
+ monkeypatch.setattr(llm_clients, "get_gemini_client", boom)
224
+
225
+ async def _noping():
226
+ return None
227
+
228
+ monkeypatch.setattr(llm_clients, "keepalive_ping", _noping)
229
+
230
+ cfg = SimpleNamespace(anthropic_api_key="k1", google_api_key="g1")
231
+ asyncio.run(llm_clients.prewarm(cfg)) # must not raise
232
+
233
+
234
+ # --------------------------------------------------------------------------
235
+ # close_all
236
+ # --------------------------------------------------------------------------
237
+
238
+ def test_close_all_closes_and_clears():
239
+ fc_a = FakeClient("a")
240
+ fc_g = FakeClient("g")
241
+ llm_clients._CLIENTS["anthropic"] = ("ka", fc_a)
242
+ llm_clients._CLIENTS["google"] = ("kg", fc_g)
243
+
244
+ asyncio.run(llm_clients.close_all())
245
+
246
+ assert fc_a.closed and fc_g.closed
247
+ assert llm_clients._CLIENTS == {}
248
+
249
+
250
+ def test_close_all_swallows_failing_close():
251
+ class BadClose:
252
+ async def aclose(self):
253
+ raise RuntimeError("close blew up")
254
+
255
+ llm_clients._CLIENTS["anthropic"] = ("k", BadClose())
256
+ asyncio.run(llm_clients.close_all()) # must swallow
257
+ assert llm_clients._CLIENTS == {}