ai-interview-assistant 2.2.0__tar.gz → 2.2.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/PKG-INFO +5 -1
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/pyproject.toml +8 -1
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/__init__.py +1 -1
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/ai_client.py +10 -10
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/config.py +32 -5
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/daemon.py +26 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/dashboard.py +86 -34
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/i18n.py +2 -1
- ai_interview_assistant-2.2.2/src/ai_interview/llm_clients.py +225 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/PKG-INFO +5 -1
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/SOURCES.txt +3 -1
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/requires.txt +5 -0
- ai_interview_assistant-2.2.2/tests/test_llm_clients.py +257 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/README.md +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/setup.cfg +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/__main__.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/__init__.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/capture.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/transcriber.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/buffer.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/cli.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/__init__.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/__main__.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/app.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/__init__.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/hotkeys.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/scripts.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/screens/settings.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/hotkey_config.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/hotkeys.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/menubar.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/metrics.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/ollama_utils.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/overlay.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/screenshot.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/__init__.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/app.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/routes.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/websocket.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/state.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/utils.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/watchdog.py +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/dependency_links.txt +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/entry_points.txt +0 -0
- {ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview_assistant.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-interview-assistant
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Ghost background AI assistant for live code challenges
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: click>=8.0
|
|
@@ -27,5 +27,9 @@ Requires-Dist: simple-term-menu>=1.6.0
|
|
|
27
27
|
Requires-Dist: datadog>=0.49.0
|
|
28
28
|
Requires-Dist: psutil>=5.9
|
|
29
29
|
Requires-Dist: httpx>=0.27.0
|
|
30
|
+
Requires-Dist: h2>=4.0
|
|
30
31
|
Requires-Dist: flet>=0.25.0
|
|
31
32
|
Requires-Dist: PyPDF2>=3.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "ai-interview-assistant"
|
|
7
|
-
version = "2.2.
|
|
7
|
+
version = "2.2.2"
|
|
8
8
|
description = "Ghost background AI assistant for live code challenges"
|
|
9
9
|
requires-python = ">=3.11"
|
|
10
10
|
dependencies = [
|
|
@@ -32,10 +32,17 @@ dependencies = [
|
|
|
32
32
|
"datadog>=0.49.0",
|
|
33
33
|
"psutil>=5.9",
|
|
34
34
|
"httpx>=0.27.0",
|
|
35
|
+
"h2>=4.0",
|
|
35
36
|
"flet>=0.25.0",
|
|
36
37
|
"PyPDF2>=3.0",
|
|
37
38
|
]
|
|
38
39
|
|
|
40
|
+
[project.optional-dependencies]
|
|
41
|
+
dev = [
|
|
42
|
+
"pytest>=8.0",
|
|
43
|
+
"pytest-asyncio>=0.23",
|
|
44
|
+
]
|
|
45
|
+
|
|
39
46
|
[project.scripts]
|
|
40
47
|
ai-interview = "ai_interview.cli:cli"
|
|
41
48
|
|
|
@@ -139,16 +139,13 @@ async def _stream_claude(state: "AppState", config: "Config", messages: list, cu
|
|
|
139
139
|
"""Stream response from Claude API with retry on transient errors."""
|
|
140
140
|
from ai_interview.server.websocket import broadcast
|
|
141
141
|
from ai_interview.metrics import metrics
|
|
142
|
+
from ai_interview import llm_clients
|
|
142
143
|
import anthropic
|
|
143
144
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
default_headers={"Authorization": f"Bearer {config.anthropic_api_key}"},
|
|
149
|
-
)
|
|
150
|
-
else:
|
|
151
|
-
client = anthropic.AsyncAnthropic(api_key=config.anthropic_api_key)
|
|
145
|
+
# Shared, connection-pooled HTTP/2 client, reused across queries so query 2+
|
|
146
|
+
# skip the DNS+TCP+TLS handshake. OAuth (sk-ant-oat) handling lives in the
|
|
147
|
+
# builder. Do NOT close it here — it is shared and closed at daemon shutdown.
|
|
148
|
+
client = llm_clients.get_anthropic_client(config.anthropic_api_key)
|
|
152
149
|
|
|
153
150
|
full_text = ""
|
|
154
151
|
_t0 = time.monotonic()
|
|
@@ -262,10 +259,13 @@ async def _stream_gemini(state: "AppState", config: "Config", messages: list, cu
|
|
|
262
259
|
_system_prompt = system_prompt or config.system_prompt
|
|
263
260
|
|
|
264
261
|
try:
|
|
265
|
-
from google import genai
|
|
266
262
|
from google.genai import types
|
|
263
|
+
from ai_interview import llm_clients
|
|
267
264
|
|
|
268
|
-
client
|
|
265
|
+
# Shared client — reused across queries (connection reuse; google-genai
|
|
266
|
+
# accepts no custom httpx client, so no HTTP/2). Serialized query model +
|
|
267
|
+
# idle-gated keepalive mean it is never used concurrently.
|
|
268
|
+
client = llm_clients.get_gemini_client(config.google_api_key)
|
|
269
269
|
contents = _messages_to_gemini_contents(messages, _system_prompt)
|
|
270
270
|
|
|
271
271
|
# Run synchronous streaming in a thread to avoid blocking asyncio
|
|
@@ -188,6 +188,25 @@ def _read_cv_text(cv_path: str) -> str:
|
|
|
188
188
|
return ""
|
|
189
189
|
|
|
190
190
|
|
|
191
|
+
def _assemble_with_context(base_prompt: str, codebase: str, cv_text: str) -> str:
|
|
192
|
+
"""Append codebase and CV context blocks to a user-supplied base prompt."""
|
|
193
|
+
prompt = base_prompt
|
|
194
|
+
if codebase:
|
|
195
|
+
prompt += (
|
|
196
|
+
"\n\n---\nThe interview is about the following codebase. "
|
|
197
|
+
"Use it to answer questions about the code directly.\n\n"
|
|
198
|
+
+ codebase
|
|
199
|
+
)
|
|
200
|
+
if cv_text:
|
|
201
|
+
prompt += (
|
|
202
|
+
"\n\nCandidate's CV/Resume:\n"
|
|
203
|
+
f"{cv_text}\n\n"
|
|
204
|
+
"When answering behavioural or experience questions, reference real projects "
|
|
205
|
+
"and roles from this CV. Keep answers first-person as if the candidate is speaking.\n"
|
|
206
|
+
)
|
|
207
|
+
return prompt
|
|
208
|
+
|
|
209
|
+
|
|
191
210
|
def _build_system_prompt(
|
|
192
211
|
language: str = "",
|
|
193
212
|
challenge_type: str = "",
|
|
@@ -306,6 +325,7 @@ class Config:
|
|
|
306
325
|
whisper_host: str = "" # Remote faster-whisper-server URL (e.g. http://192.168.1.50:8000)
|
|
307
326
|
show_menubar_icon: bool = True
|
|
308
327
|
scripts: dict = field(default_factory=dict)
|
|
328
|
+
user_system_prompt: str = "" # user-edited base prompt from dashboard preview
|
|
309
329
|
system_prompt: str = field(default="")
|
|
310
330
|
|
|
311
331
|
@property
|
|
@@ -340,11 +360,17 @@ class Config:
|
|
|
340
360
|
if not self.system_prompt:
|
|
341
361
|
codebase = read_codebase_context(self.context_path) if self.context_path else ""
|
|
342
362
|
cv_text = _read_cv_text(self.cv_path)
|
|
343
|
-
self.
|
|
344
|
-
|
|
345
|
-
self.
|
|
346
|
-
|
|
347
|
-
|
|
363
|
+
if self.user_system_prompt:
|
|
364
|
+
# User customised the base prompt — use it and still inject codebase/CV
|
|
365
|
+
self.system_prompt = _assemble_with_context(
|
|
366
|
+
self.user_system_prompt, codebase, cv_text
|
|
367
|
+
)
|
|
368
|
+
else:
|
|
369
|
+
self.system_prompt = _build_system_prompt(
|
|
370
|
+
self.interview_language, self.challenge_type, codebase,
|
|
371
|
+
self.role_context, self.transcription_language, cv_text,
|
|
372
|
+
scripts=self.scripts,
|
|
373
|
+
)
|
|
348
374
|
|
|
349
375
|
@classmethod
|
|
350
376
|
def from_saved(cls, overrides: Optional[dict] = None) -> "Config":
|
|
@@ -392,4 +418,5 @@ class Config:
|
|
|
392
418
|
whisper_host=merged.get("whisper_host", ""),
|
|
393
419
|
show_menubar_icon=merged.get("show_menubar_icon", True),
|
|
394
420
|
scripts=merged.get("scripts", {}),
|
|
421
|
+
user_system_prompt=merged.get("user_system_prompt", ""),
|
|
395
422
|
)
|
|
@@ -279,10 +279,19 @@ def run_daemon(config: Config) -> None:
|
|
|
279
279
|
await site.start()
|
|
280
280
|
logger.info("Viewer server on 0.0.0.0:%d", config.port)
|
|
281
281
|
|
|
282
|
+
# Pre-warm pooled LLM connections (HTTP/2 + keep-alive) so the very first
|
|
283
|
+
# query skips the DNS+TCP+TLS handshake. Best-effort — never blocks start.
|
|
284
|
+
from ai_interview import llm_clients
|
|
285
|
+
try:
|
|
286
|
+
await llm_clients.prewarm(config)
|
|
287
|
+
except Exception as exc:
|
|
288
|
+
logger.info("LLM prewarm failed (best-effort): %s", exc)
|
|
289
|
+
|
|
282
290
|
_MAX_IDLE_S = 30 * 60 # 30 min idle → auto-shutdown
|
|
283
291
|
state.last_activity_at = time.time() # initialise so first check is fair
|
|
284
292
|
_last_heartbeat_uptime = -1
|
|
285
293
|
_last_cpu_broadcast = 0.0
|
|
294
|
+
_last_llm_keepalive = time.time() # prewarm just pinged; next ping in ~30s
|
|
286
295
|
|
|
287
296
|
# Prime psutil baseline — first call always returns 0.0, real values start from second call
|
|
288
297
|
try:
|
|
@@ -323,11 +332,28 @@ def run_daemon(config: Config) -> None:
|
|
|
323
332
|
except Exception:
|
|
324
333
|
pass
|
|
325
334
|
|
|
335
|
+
# Keep pooled LLM sockets warm (~30s cadence) so a query after a long
|
|
336
|
+
# idle gap doesn't pay a fresh handshake. Only ping while idle: an
|
|
337
|
+
# in-flight query already keeps the connection hot, and skipping then
|
|
338
|
+
# avoids touching the shared client concurrently. Fire-and-forget so
|
|
339
|
+
# the 0.5s health loop never blocks on the network.
|
|
340
|
+
if now - _last_llm_keepalive >= 30:
|
|
341
|
+
_last_llm_keepalive = now
|
|
342
|
+
_aq = state.active_query
|
|
343
|
+
if _aq is None or _aq.done():
|
|
344
|
+
asyncio.ensure_future(llm_clients.keepalive_ping())
|
|
345
|
+
|
|
326
346
|
# Auto-shutdown after 30 min of no hotkeys / queries / transcripts
|
|
327
347
|
if idle >= _MAX_IDLE_S:
|
|
328
348
|
logger.info("30-minute idle limit reached — shutting down automatically")
|
|
329
349
|
shutdown_flag["stop"] = True
|
|
330
350
|
|
|
351
|
+
# Close pooled LLM clients in the loop they were created in (best-effort).
|
|
352
|
+
try:
|
|
353
|
+
await llm_clients.close_all()
|
|
354
|
+
except Exception as exc:
|
|
355
|
+
logger.info("LLM client close failed (best-effort): %s", exc)
|
|
356
|
+
|
|
331
357
|
await runner.cleanup()
|
|
332
358
|
|
|
333
359
|
try:
|
|
@@ -151,15 +151,32 @@ class DashboardScreen(ft.Column):
|
|
|
151
151
|
|
|
152
152
|
# ---- Prompt preview ----
|
|
153
153
|
self._prompt_preview_expanded = False
|
|
154
|
-
self.
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
154
|
+
self._preview_manually_edited = False
|
|
155
|
+
self._prompt_preview = ft.TextField(
|
|
156
|
+
value="",
|
|
157
|
+
multiline=True,
|
|
158
|
+
min_lines=8,
|
|
159
|
+
max_lines=20,
|
|
160
|
+
text_size=11,
|
|
161
|
+
border_radius=8,
|
|
162
|
+
border_color="#2a2a2a",
|
|
163
|
+
bgcolor="#111111",
|
|
164
|
+
content_padding=ft.padding.all(12),
|
|
165
|
+
text_style=ft.TextStyle(italic=True, color=MUTED),
|
|
160
166
|
visible=False,
|
|
167
|
+
expand=True,
|
|
168
|
+
on_change=lambda _: setattr(self, "_preview_manually_edited", True),
|
|
161
169
|
)
|
|
170
|
+
self._preview_notes = ft.Text("", size=10, color=MUTED, italic=True, visible=False)
|
|
162
171
|
self._preview_chevron = ft.Text("▶", size=11, color=MUTED)
|
|
172
|
+
self._regenerate_btn = ft.IconButton(
|
|
173
|
+
icon=ft.Icons.REFRESH,
|
|
174
|
+
icon_size=14,
|
|
175
|
+
icon_color=MUTED,
|
|
176
|
+
tooltip=t("regenerate_preview"),
|
|
177
|
+
on_click=self._regenerate_preview,
|
|
178
|
+
visible=False,
|
|
179
|
+
)
|
|
163
180
|
self._update_prompt_preview()
|
|
164
181
|
|
|
165
182
|
self._lang_field.visible = challenge in ("code_challenge", "system_design")
|
|
@@ -218,19 +235,23 @@ class DashboardScreen(ft.Column):
|
|
|
218
235
|
transcription_card,
|
|
219
236
|
self._cv_check,
|
|
220
237
|
ft.Column([
|
|
221
|
-
ft.
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
238
|
+
ft.Row([
|
|
239
|
+
ft.TextButton(
|
|
240
|
+
content=ft.Row(
|
|
241
|
+
[
|
|
242
|
+
ft.Text(t("prompt_preview_label"), size=11, color=MUTED),
|
|
243
|
+
self._preview_chevron,
|
|
244
|
+
],
|
|
245
|
+
spacing=4,
|
|
246
|
+
tight=True,
|
|
247
|
+
),
|
|
248
|
+
on_click=self._toggle_prompt_preview,
|
|
249
|
+
style=ft.ButtonStyle(padding=ft.padding.all(0), overlay_color=ft.Colors.TRANSPARENT),
|
|
229
250
|
),
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
),
|
|
251
|
+
self._regenerate_btn,
|
|
252
|
+
], spacing=0, vertical_alignment=ft.CrossAxisAlignment.CENTER),
|
|
233
253
|
self._prompt_preview,
|
|
254
|
+
self._preview_notes,
|
|
234
255
|
], spacing=6),
|
|
235
256
|
ft.Row([self._start_btn, self._start_status], spacing=12),
|
|
236
257
|
], spacing=12)
|
|
@@ -333,6 +354,15 @@ class DashboardScreen(ft.Column):
|
|
|
333
354
|
self._cv_check.value = bool(cv_name)
|
|
334
355
|
self._cv_check.disabled = not bool(cv_name)
|
|
335
356
|
|
|
357
|
+
# Restore saved prompt if present, otherwise regenerate
|
|
358
|
+
saved_prompt = fresh.get("user_system_prompt", "")
|
|
359
|
+
if saved_prompt:
|
|
360
|
+
self._prompt_preview.value = saved_prompt
|
|
361
|
+
self._preview_manually_edited = True
|
|
362
|
+
else:
|
|
363
|
+
self._preview_manually_edited = False
|
|
364
|
+
self._update_prompt_preview()
|
|
365
|
+
|
|
336
366
|
self._update_model_options()
|
|
337
367
|
try:
|
|
338
368
|
self._page.update()
|
|
@@ -408,11 +438,24 @@ class DashboardScreen(ft.Column):
|
|
|
408
438
|
|
|
409
439
|
def _toggle_prompt_preview(self, _e=None):
|
|
410
440
|
self._prompt_preview_expanded = not self._prompt_preview_expanded
|
|
411
|
-
|
|
412
|
-
self.
|
|
441
|
+
expanded = self._prompt_preview_expanded
|
|
442
|
+
self._prompt_preview.visible = expanded
|
|
443
|
+
self._preview_notes.visible = expanded and bool(self._preview_notes.value)
|
|
444
|
+
self._regenerate_btn.visible = expanded
|
|
445
|
+
self._preview_chevron.value = "▼" if expanded else "▶"
|
|
413
446
|
try:
|
|
414
447
|
self._preview_chevron.update()
|
|
415
448
|
self._prompt_preview.update()
|
|
449
|
+
self._preview_notes.update()
|
|
450
|
+
self._regenerate_btn.update()
|
|
451
|
+
except Exception:
|
|
452
|
+
pass
|
|
453
|
+
|
|
454
|
+
def _regenerate_preview(self, _e=None):
|
|
455
|
+
self._preview_manually_edited = False
|
|
456
|
+
self._update_prompt_preview()
|
|
457
|
+
try:
|
|
458
|
+
self._page.update()
|
|
416
459
|
except Exception:
|
|
417
460
|
pass
|
|
418
461
|
|
|
@@ -420,33 +463,41 @@ class DashboardScreen(ft.Column):
|
|
|
420
463
|
"""Rebuild the system prompt preview from current field values."""
|
|
421
464
|
if not hasattr(self, "_cv_check") or not hasattr(self, "_prompt_preview"):
|
|
422
465
|
return
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
brief = self._brief_field.value or ""
|
|
466
|
+
|
|
467
|
+
# Rebuild notes regardless of manual edit state
|
|
426
468
|
context_path = (self._context_path.value or "").strip()
|
|
427
469
|
cv_selected = self._cv_check.value
|
|
470
|
+
notes = []
|
|
471
|
+
if context_path:
|
|
472
|
+
notes.append(f"+ codebase context will be injected from: {context_path}")
|
|
473
|
+
if cv_selected and self._config.get("cv_path"):
|
|
474
|
+
notes.append(f"+ CV will be injected: {os.path.basename(self._config['cv_path'])}")
|
|
475
|
+
self._preview_notes.value = "\n".join(notes)
|
|
476
|
+
self._preview_notes.visible = self._prompt_preview_expanded and bool(notes)
|
|
477
|
+
|
|
478
|
+
if self._preview_manually_edited:
|
|
479
|
+
# User has edited the prompt — don't overwrite, just refresh notes
|
|
480
|
+
if self._prompt_preview_expanded:
|
|
481
|
+
try:
|
|
482
|
+
self._preview_notes.update()
|
|
483
|
+
except Exception:
|
|
484
|
+
pass
|
|
485
|
+
return
|
|
428
486
|
|
|
429
487
|
scripts = {**DEFAULT_SCRIPTS, **self._config.get("scripts", {})}
|
|
430
488
|
prompt = _build_system_prompt(
|
|
431
|
-
language=
|
|
432
|
-
challenge_type=
|
|
489
|
+
language=self._lang_field.value or "",
|
|
490
|
+
challenge_type=self._challenge_dropdown.value or "code_challenge",
|
|
433
491
|
codebase_context="",
|
|
434
|
-
role_context=
|
|
492
|
+
role_context=self._brief_field.value or "",
|
|
435
493
|
scripts=scripts,
|
|
436
494
|
)
|
|
437
|
-
|
|
438
|
-
notes = []
|
|
439
|
-
if context_path:
|
|
440
|
-
notes.append(f"+ codebase context from: {context_path}")
|
|
441
|
-
if cv_selected and self._config.get("cv_path"):
|
|
442
|
-
notes.append(f"+ CV: {os.path.basename(self._config['cv_path'])}")
|
|
443
|
-
if notes:
|
|
444
|
-
prompt += "\n\n" + "\n".join(notes)
|
|
445
|
-
|
|
446
495
|
self._prompt_preview.value = prompt
|
|
496
|
+
|
|
447
497
|
if self._prompt_preview_expanded:
|
|
448
498
|
try:
|
|
449
499
|
self._prompt_preview.update()
|
|
500
|
+
self._preview_notes.update()
|
|
450
501
|
except Exception:
|
|
451
502
|
pass
|
|
452
503
|
|
|
@@ -540,6 +591,7 @@ class DashboardScreen(ft.Column):
|
|
|
540
591
|
config["challenge_type"] = self._challenge_dropdown.value or "code_challenge"
|
|
541
592
|
config["transcription_language"] = self._trans_lang.value or "auto"
|
|
542
593
|
config["context_path"] = (self._context_path.value or "").strip()
|
|
594
|
+
config["user_system_prompt"] = (self._prompt_preview.value or "").strip()
|
|
543
595
|
|
|
544
596
|
# Per-session CV toggle — don't permanently clear the path
|
|
545
597
|
if not self._cv_check.value:
|
|
@@ -155,7 +155,8 @@ _STRINGS = {
|
|
|
155
155
|
"type_press_hold": "Tap + Hold",
|
|
156
156
|
|
|
157
157
|
# Dashboard prompt preview
|
|
158
|
-
"prompt_preview_label": "What will be sent to the AI
|
|
158
|
+
"prompt_preview_label": "What will be sent to the AI:",
|
|
159
|
+
"regenerate_preview": "Regenerate from current selections",
|
|
159
160
|
|
|
160
161
|
# AI Scripts
|
|
161
162
|
"save": "Save",
|
|
@@ -0,0 +1,225 @@
|
|
|
1
|
+
"""Shared, long-lived LLM SDK clients (connection reuse + HTTP/2 + heartbeat).
|
|
2
|
+
|
|
3
|
+
Every AI query used to build a fresh SDK client (``ai_client._stream_claude`` /
|
|
4
|
+
``_stream_gemini``), paying a new DNS + TCP + TLS handshake on its first request.
|
|
5
|
+
On a high-latency link that handshake dominates time-to-first-token. Deepgram
|
|
6
|
+
never has this problem because it holds one persistent websocket for the whole
|
|
7
|
+
session.
|
|
8
|
+
|
|
9
|
+
This module mirrors that idea: one connection-pooled client per provider, reused
|
|
10
|
+
across every query in a daemon session. Query 1 pays the handshake; query 2+ skip
|
|
11
|
+
it. The Anthropic transport additionally uses HTTP/2 (multiplexing + the
|
|
12
|
+
connection stays open), and a periodic keep-alive heartbeat keeps the pooled
|
|
13
|
+
socket warm so it never idles out mid-session.
|
|
14
|
+
|
|
15
|
+
Design notes / invariants:
|
|
16
|
+
- TLS verification stays ON (this repo verifies via certifi; do NOT add
|
|
17
|
+
``verify=False`` here).
|
|
18
|
+
- No top-level imports of ``ai_interview.*`` or of audio libraries — keep this
|
|
19
|
+
importable both before and after the daemon fork. SDKs are imported lazily.
|
|
20
|
+
- The daemon runs a single asyncio loop and serializes queries (a new query
|
|
21
|
+
cancels the in-flight one), so a shared client is never used by two queries at
|
|
22
|
+
once. The caller gates ``keepalive_ping`` on "no active query".
|
|
23
|
+
"""
|
|
24
|
+
from __future__ import annotations
|
|
25
|
+
|
|
26
|
+
import inspect
|
|
27
|
+
import logging
|
|
28
|
+
|
|
29
|
+
logger = logging.getLogger(__name__)
|
|
30
|
+
|
|
31
|
+
# provider -> (api_key, client_object)
|
|
32
|
+
_CLIENTS: dict[str, tuple[str, object]] = {}
|
|
33
|
+
# Strong refs to in-flight rotation-close tasks, so a fire-and-forget close is
|
|
34
|
+
# not GC'd before it runs (and so close_all can drain them at shutdown).
|
|
35
|
+
_PENDING_CLOSES: set = set()
|
|
36
|
+
# Latched so the HTTP/2 status (on, or downgraded to 1.1) is logged exactly once.
|
|
37
|
+
_h2_unavailable_logged = False
|
|
38
|
+
_h2_enabled_logged = False
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def _httpx_client():
|
|
42
|
+
"""A connection-pooled async httpx client tuned to stay warm across the gaps
|
|
43
|
+
between queries. HTTP/2 multiplexes and keeps the connection open;
|
|
44
|
+
``keepalive_expiry`` outlives a typical idle gap.
|
|
45
|
+
|
|
46
|
+
HTTP/2 needs the optional ``h2`` package. If it is not installed, httpx raises
|
|
47
|
+
``ImportError`` when ``http2=True``; we degrade to HTTP/1.1 (still pooled +
|
|
48
|
+
keep-alive) instead of breaking queries.
|
|
49
|
+
"""
|
|
50
|
+
global _h2_unavailable_logged, _h2_enabled_logged
|
|
51
|
+
import httpx
|
|
52
|
+
|
|
53
|
+
limits = httpx.Limits(max_keepalive_connections=10, keepalive_expiry=300.0)
|
|
54
|
+
try:
|
|
55
|
+
client = httpx.AsyncClient(http2=True, limits=limits)
|
|
56
|
+
if not _h2_enabled_logged:
|
|
57
|
+
_h2_enabled_logged = True
|
|
58
|
+
logger.info("LLM transport: HTTP/2 enabled with connection reuse")
|
|
59
|
+
return client
|
|
60
|
+
except ImportError:
|
|
61
|
+
if not _h2_unavailable_logged:
|
|
62
|
+
_h2_unavailable_logged = True
|
|
63
|
+
logger.info(
|
|
64
|
+
"HTTP/2 unavailable (install 'h2' / httpx[http2]) — "
|
|
65
|
+
"using HTTP/1.1 with connection reuse"
|
|
66
|
+
)
|
|
67
|
+
return httpx.AsyncClient(http2=False, limits=limits)
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _build_anthropic(api_key: str):
|
|
71
|
+
"""Build a pooled AsyncAnthropic, honoring the OAuth Bearer-token convention.
|
|
72
|
+
|
|
73
|
+
Mirrors ``ai_client._stream_claude``: an ``sk-ant-oat`` key is an OAuth token
|
|
74
|
+
sent via the Authorization header, not the api_key field.
|
|
75
|
+
"""
|
|
76
|
+
import anthropic
|
|
77
|
+
|
|
78
|
+
if api_key.startswith("sk-ant-oat"):
|
|
79
|
+
return anthropic.AsyncAnthropic(
|
|
80
|
+
api_key="placeholder",
|
|
81
|
+
default_headers={"Authorization": f"Bearer {api_key}"},
|
|
82
|
+
http_client=_httpx_client(),
|
|
83
|
+
)
|
|
84
|
+
return anthropic.AsyncAnthropic(api_key=api_key, http_client=_httpx_client())
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def _build_gemini(api_key: str):
|
|
88
|
+
"""Build a reused genai.Client.
|
|
89
|
+
|
|
90
|
+
google-genai does not accept a custom httpx client, so Gemini gets connection
|
|
91
|
+
reuse only (no HTTP/2). Reuse alone still skips the per-query handshake.
|
|
92
|
+
|
|
93
|
+
This is a SYNC client. It is only ever touched from one place at a time: the
|
|
94
|
+
daemon serializes queries, and ``keepalive_ping`` is gated by the caller on
|
|
95
|
+
"no active query", so the client is never used concurrently.
|
|
96
|
+
"""
|
|
97
|
+
from google import genai
|
|
98
|
+
|
|
99
|
+
return genai.Client(api_key=api_key)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
def _get(provider: str, api_key: str, builder):
|
|
103
|
+
cached = _CLIENTS.get(provider)
|
|
104
|
+
if cached is not None and cached[0] == api_key:
|
|
105
|
+
return cached[1]
|
|
106
|
+
if cached is not None:
|
|
107
|
+
# Key rotated — drop the stale client and close it without blocking.
|
|
108
|
+
_schedule_close(cached[1])
|
|
109
|
+
client = builder(api_key)
|
|
110
|
+
_CLIENTS[provider] = (api_key, client)
|
|
111
|
+
return client
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
def get_anthropic_client(api_key: str):
|
|
115
|
+
return _get("anthropic", api_key, _build_anthropic)
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def get_gemini_client(api_key: str):
|
|
119
|
+
return _get("google", api_key, _build_gemini)
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
async def _aclose(client) -> None:
|
|
123
|
+
"""Best-effort close of an SDK client or raw httpx client. Tries async
|
|
124
|
+
``aclose`` first, then ``close`` (awaiting it if it returns a coroutine)."""
|
|
125
|
+
try:
|
|
126
|
+
aclose = getattr(client, "aclose", None)
|
|
127
|
+
if aclose is not None:
|
|
128
|
+
await aclose()
|
|
129
|
+
return
|
|
130
|
+
close = getattr(client, "close", None)
|
|
131
|
+
if close is not None:
|
|
132
|
+
res = close()
|
|
133
|
+
if inspect.isawaitable(res):
|
|
134
|
+
await res
|
|
135
|
+
except Exception as exc:
|
|
136
|
+
logger.info("llm client close failed (best-effort): %s", exc)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _schedule_close(client) -> None:
|
|
140
|
+
"""Close a rotated-out client without blocking the caller. If an event loop is
|
|
141
|
+
running, schedule the async close; otherwise close synchronously."""
|
|
142
|
+
import asyncio
|
|
143
|
+
|
|
144
|
+
try:
|
|
145
|
+
loop = asyncio.get_running_loop()
|
|
146
|
+
except RuntimeError:
|
|
147
|
+
loop = None
|
|
148
|
+
if loop is not None:
|
|
149
|
+
task = loop.create_task(_aclose(client))
|
|
150
|
+
_PENDING_CLOSES.add(task)
|
|
151
|
+
task.add_done_callback(_PENDING_CLOSES.discard)
|
|
152
|
+
else:
|
|
153
|
+
# No running loop in this thread — close synchronously. Best-effort: a
|
|
154
|
+
# rotated-out client teardown must never break the caller's query path.
|
|
155
|
+
try:
|
|
156
|
+
asyncio.run(_aclose(client))
|
|
157
|
+
except Exception as exc:
|
|
158
|
+
logger.info("llm client sync close failed (best-effort): %s", exc)
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
async def close_all() -> None:
|
|
162
|
+
"""Close every cached client. Called at daemon shutdown. Best-effort."""
|
|
163
|
+
clients = [c for _, (_, c) in _CLIENTS.items()]
|
|
164
|
+
_CLIENTS.clear()
|
|
165
|
+
for client in clients:
|
|
166
|
+
await _aclose(client)
|
|
167
|
+
# Drain any in-flight rotation-close tasks so the loop does not tear down with
|
|
168
|
+
# a pending close (avoids the 3.12 "Task was destroyed" warning).
|
|
169
|
+
if _PENDING_CLOSES:
|
|
170
|
+
import asyncio
|
|
171
|
+
|
|
172
|
+
await asyncio.gather(*list(_PENDING_CLOSES), return_exceptions=True)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
# ---------------------------------------------------------------------------
|
|
176
|
+
# Keep-alive heartbeat
|
|
177
|
+
#
|
|
178
|
+
# httpx keeps an idle pooled socket only for `keepalive_expiry` (300s), and a
|
|
179
|
+
# provider/proxy/VPN often closes an idle keep-alive connection much sooner
|
|
180
|
+
# (~60-120s). Without traffic the socket goes cold mid-session and the next query
|
|
181
|
+
# pays a fresh handshake. Mirroring Deepgram's websocket heartbeat, while the
|
|
182
|
+
# daemon runs we send a cheap, no-token request (a model list) on each pooled
|
|
183
|
+
# client every ~30s so the socket never idles out. The daemon main loop drives the
|
|
184
|
+
# cadence and gates on "no active query".
|
|
185
|
+
# ---------------------------------------------------------------------------
|
|
186
|
+
|
|
187
|
+
async def _ping_client(provider: str, client) -> None:
|
|
188
|
+
"""One cheap, no-token request that keeps the pooled socket warm. Uses the
|
|
189
|
+
provider's model-list endpoint (a GET, no completion, no tokens billed)."""
|
|
190
|
+
if provider == "anthropic":
|
|
191
|
+
await client.models.list()
|
|
192
|
+
elif provider == "google":
|
|
193
|
+
import asyncio
|
|
194
|
+
|
|
195
|
+
# google-genai's Client is synchronous; touch it off the event loop.
|
|
196
|
+
await asyncio.to_thread(lambda: next(iter(client.models.list()), None))
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
async def keepalive_ping() -> None:
|
|
200
|
+
"""Touch every cached client's connection so the pooled TCP+TLS socket stays
|
|
201
|
+
warm. Best-effort: a failed ping (transient network, or a 401 on an OAuth
|
|
202
|
+
models endpoint) is logged at INFO and never raised — the connection still
|
|
203
|
+
pooled, and a real query re-establishes it if needed."""
|
|
204
|
+
for provider, (_api_key, client) in list(_CLIENTS.items()):
|
|
205
|
+
try:
|
|
206
|
+
await _ping_client(provider, client)
|
|
207
|
+
except Exception as exc:
|
|
208
|
+
logger.info("llm keepalive ping failed for %s (best-effort): %s", provider, exc)
|
|
209
|
+
|
|
210
|
+
|
|
211
|
+
async def prewarm(config) -> None:
|
|
212
|
+
"""Build and warm the pooled client for every provider that has a configured
|
|
213
|
+
key, so the FIRST query of a session skips the handshake too. Called once at
|
|
214
|
+
daemon startup. Best-effort per provider (a missing SDK or bad key never
|
|
215
|
+
breaks the others)."""
|
|
216
|
+
for getter, key in (
|
|
217
|
+
(get_anthropic_client, getattr(config, "anthropic_api_key", "") or ""),
|
|
218
|
+
(get_gemini_client, getattr(config, "google_api_key", "") or ""),
|
|
219
|
+
):
|
|
220
|
+
if key:
|
|
221
|
+
try:
|
|
222
|
+
getter(key)
|
|
223
|
+
except Exception as exc:
|
|
224
|
+
logger.info("llm prewarm build failed (best-effort): %s", exc)
|
|
225
|
+
await keepalive_ping()
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-interview-assistant
|
|
3
|
-
Version: 2.2.
|
|
3
|
+
Version: 2.2.2
|
|
4
4
|
Summary: Ghost background AI assistant for live code challenges
|
|
5
5
|
Requires-Python: >=3.11
|
|
6
6
|
Requires-Dist: click>=8.0
|
|
@@ -27,5 +27,9 @@ Requires-Dist: simple-term-menu>=1.6.0
|
|
|
27
27
|
Requires-Dist: datadog>=0.49.0
|
|
28
28
|
Requires-Dist: psutil>=5.9
|
|
29
29
|
Requires-Dist: httpx>=0.27.0
|
|
30
|
+
Requires-Dist: h2>=4.0
|
|
30
31
|
Requires-Dist: flet>=0.25.0
|
|
31
32
|
Requires-Dist: PyPDF2>=3.0
|
|
33
|
+
Provides-Extra: dev
|
|
34
|
+
Requires-Dist: pytest>=8.0; extra == "dev"
|
|
35
|
+
Requires-Dist: pytest-asyncio>=0.23; extra == "dev"
|
|
@@ -10,6 +10,7 @@ src/ai_interview/daemon.py
|
|
|
10
10
|
src/ai_interview/hotkey_config.py
|
|
11
11
|
src/ai_interview/hotkeys.py
|
|
12
12
|
src/ai_interview/i18n.py
|
|
13
|
+
src/ai_interview/llm_clients.py
|
|
13
14
|
src/ai_interview/menubar.py
|
|
14
15
|
src/ai_interview/metrics.py
|
|
15
16
|
src/ai_interview/ollama_utils.py
|
|
@@ -38,4 +39,5 @@ src/ai_interview_assistant.egg-info/SOURCES.txt
|
|
|
38
39
|
src/ai_interview_assistant.egg-info/dependency_links.txt
|
|
39
40
|
src/ai_interview_assistant.egg-info/entry_points.txt
|
|
40
41
|
src/ai_interview_assistant.egg-info/requires.txt
|
|
41
|
-
src/ai_interview_assistant.egg-info/top_level.txt
|
|
42
|
+
src/ai_interview_assistant.egg-info/top_level.txt
|
|
43
|
+
tests/test_llm_clients.py
|
|
@@ -16,6 +16,7 @@ simple-term-menu>=1.6.0
|
|
|
16
16
|
datadog>=0.49.0
|
|
17
17
|
psutil>=5.9
|
|
18
18
|
httpx>=0.27.0
|
|
19
|
+
h2>=4.0
|
|
19
20
|
flet>=0.25.0
|
|
20
21
|
PyPDF2>=3.0
|
|
21
22
|
|
|
@@ -26,3 +27,7 @@ pyobjc-framework-Quartz>=10.0
|
|
|
26
27
|
pyobjc-framework-ScreenCaptureKit>=10.0
|
|
27
28
|
pyobjc-framework-CoreMedia>=10.0
|
|
28
29
|
pyobjc-framework-libdispatch>=10.0
|
|
30
|
+
|
|
31
|
+
[dev]
|
|
32
|
+
pytest>=8.0
|
|
33
|
+
pytest-asyncio>=0.23
|
|
@@ -0,0 +1,257 @@
|
|
|
1
|
+
"""Unit tests for ai_interview.llm_clients (no network).
|
|
2
|
+
|
|
3
|
+
Covers the acceptance criteria in docs/perf/SPEC-llm-connection-reuse.md:
|
|
4
|
+
connection reuse, key-rotation close, OAuth header handling, HTTP/2 fallback,
|
|
5
|
+
and best-effort keepalive/prewarm/close_all.
|
|
6
|
+
"""
|
|
7
|
+
from __future__ import annotations
|
|
8
|
+
|
|
9
|
+
import asyncio
|
|
10
|
+
from types import SimpleNamespace
|
|
11
|
+
|
|
12
|
+
import pytest
|
|
13
|
+
|
|
14
|
+
from ai_interview import llm_clients
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@pytest.fixture(autouse=True)
|
|
18
|
+
def _reset_module_state():
|
|
19
|
+
llm_clients._CLIENTS.clear()
|
|
20
|
+
llm_clients._PENDING_CLOSES.clear()
|
|
21
|
+
llm_clients._h2_unavailable_logged = False
|
|
22
|
+
llm_clients._h2_enabled_logged = False
|
|
23
|
+
yield
|
|
24
|
+
llm_clients._CLIENTS.clear()
|
|
25
|
+
llm_clients._PENDING_CLOSES.clear()
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class FakeClient:
|
|
29
|
+
"""Stand-in SDK client with an async aclose() that records teardown."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, key="k"):
|
|
32
|
+
self.key = key
|
|
33
|
+
self.closed = False
|
|
34
|
+
|
|
35
|
+
async def aclose(self):
|
|
36
|
+
self.closed = True
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
# --------------------------------------------------------------------------
|
|
40
|
+
# Reuse + key rotation
|
|
41
|
+
# --------------------------------------------------------------------------
|
|
42
|
+
|
|
43
|
+
def test_anthropic_client_reused_for_same_key(monkeypatch):
|
|
44
|
+
builds = []
|
|
45
|
+
|
|
46
|
+
def fake_build(key):
|
|
47
|
+
builds.append(key)
|
|
48
|
+
return FakeClient(key)
|
|
49
|
+
|
|
50
|
+
monkeypatch.setattr(llm_clients, "_build_anthropic", fake_build)
|
|
51
|
+
|
|
52
|
+
c1 = llm_clients.get_anthropic_client("k1")
|
|
53
|
+
c2 = llm_clients.get_anthropic_client("k1")
|
|
54
|
+
|
|
55
|
+
assert c1 is c2, "same key must reuse the cached client"
|
|
56
|
+
assert builds == ["k1"], "client must be built exactly once"
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def test_gemini_client_reused_for_same_key(monkeypatch):
|
|
60
|
+
builds = []
|
|
61
|
+
monkeypatch.setattr(
|
|
62
|
+
llm_clients, "_build_gemini", lambda key: builds.append(key) or FakeClient(key)
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
c1 = llm_clients.get_gemini_client("g1")
|
|
66
|
+
c2 = llm_clients.get_gemini_client("g1")
|
|
67
|
+
|
|
68
|
+
assert c1 is c2
|
|
69
|
+
assert builds == ["g1"]
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def test_key_rotation_rebuilds_and_closes_stale(monkeypatch):
|
|
73
|
+
monkeypatch.setattr(llm_clients, "_build_anthropic", lambda key: FakeClient(key))
|
|
74
|
+
|
|
75
|
+
old = llm_clients.get_anthropic_client("k1")
|
|
76
|
+
new = llm_clients.get_anthropic_client("k2") # no running loop -> sync close
|
|
77
|
+
|
|
78
|
+
assert new is not old, "rotated key must build a fresh client"
|
|
79
|
+
assert old.closed is True, "stale client must be closed on rotation"
|
|
80
|
+
assert llm_clients._CLIENTS["anthropic"] == ("k2", new)
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
# --------------------------------------------------------------------------
|
|
84
|
+
# OAuth header handling (must match _stream_claude's prior behavior)
|
|
85
|
+
# --------------------------------------------------------------------------
|
|
86
|
+
|
|
87
|
+
def test_build_anthropic_oauth_uses_bearer_header(monkeypatch):
|
|
88
|
+
import anthropic
|
|
89
|
+
|
|
90
|
+
captured = {}
|
|
91
|
+
|
|
92
|
+
class FakeAA:
|
|
93
|
+
def __init__(self, **kwargs):
|
|
94
|
+
captured.update(kwargs)
|
|
95
|
+
|
|
96
|
+
monkeypatch.setattr(anthropic, "AsyncAnthropic", FakeAA)
|
|
97
|
+
monkeypatch.setattr(llm_clients, "_httpx_client", lambda: "HTTPX")
|
|
98
|
+
|
|
99
|
+
llm_clients._build_anthropic("sk-ant-oat-secret")
|
|
100
|
+
|
|
101
|
+
assert captured["api_key"] == "placeholder"
|
|
102
|
+
assert captured["default_headers"]["Authorization"] == "Bearer sk-ant-oat-secret"
|
|
103
|
+
assert captured["http_client"] == "HTTPX"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_build_anthropic_normal_key_no_bearer(monkeypatch):
|
|
107
|
+
import anthropic
|
|
108
|
+
|
|
109
|
+
captured = {}
|
|
110
|
+
|
|
111
|
+
class FakeAA:
|
|
112
|
+
def __init__(self, **kwargs):
|
|
113
|
+
captured.update(kwargs)
|
|
114
|
+
|
|
115
|
+
monkeypatch.setattr(anthropic, "AsyncAnthropic", FakeAA)
|
|
116
|
+
monkeypatch.setattr(llm_clients, "_httpx_client", lambda: "HTTPX")
|
|
117
|
+
|
|
118
|
+
llm_clients._build_anthropic("sk-ant-api03-real")
|
|
119
|
+
|
|
120
|
+
assert captured["api_key"] == "sk-ant-api03-real"
|
|
121
|
+
assert "default_headers" not in captured
|
|
122
|
+
assert captured["http_client"] == "HTTPX"
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
# --------------------------------------------------------------------------
|
|
126
|
+
# HTTP/2 with graceful HTTP/1.1 fallback
|
|
127
|
+
# --------------------------------------------------------------------------
|
|
128
|
+
|
|
129
|
+
def test_httpx_client_requests_http2(monkeypatch):
|
|
130
|
+
import httpx
|
|
131
|
+
|
|
132
|
+
calls = []
|
|
133
|
+
|
|
134
|
+
class FakeAsyncClient:
|
|
135
|
+
def __init__(self, **kwargs):
|
|
136
|
+
calls.append(kwargs)
|
|
137
|
+
|
|
138
|
+
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
|
|
139
|
+
|
|
140
|
+
llm_clients._httpx_client()
|
|
141
|
+
|
|
142
|
+
assert calls[-1]["http2"] is True
|
|
143
|
+
assert calls[-1]["limits"].max_keepalive_connections == 10
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def test_httpx_client_falls_back_to_http1_when_h2_missing(monkeypatch):
|
|
147
|
+
import httpx
|
|
148
|
+
|
|
149
|
+
calls = []
|
|
150
|
+
|
|
151
|
+
class FakeAsyncClient:
|
|
152
|
+
def __init__(self, **kwargs):
|
|
153
|
+
if kwargs.get("http2"):
|
|
154
|
+
raise ImportError("Using http2=True, but the 'h2' package is not installed")
|
|
155
|
+
calls.append(kwargs)
|
|
156
|
+
|
|
157
|
+
monkeypatch.setattr(httpx, "AsyncClient", FakeAsyncClient)
|
|
158
|
+
|
|
159
|
+
llm_clients._httpx_client() # must not raise
|
|
160
|
+
|
|
161
|
+
assert calls[-1]["http2"] is False, "must fall back to HTTP/1.1 when h2 missing"
|
|
162
|
+
assert llm_clients._h2_unavailable_logged is True
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# --------------------------------------------------------------------------
|
|
166
|
+
# Keepalive — best-effort
|
|
167
|
+
# --------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
def test_keepalive_ping_empty_cache_is_noop():
|
|
170
|
+
asyncio.run(llm_clients.keepalive_ping()) # no clients -> must not raise
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def test_keepalive_ping_swallows_failing_client():
|
|
174
|
+
class Boom:
|
|
175
|
+
class models:
|
|
176
|
+
@staticmethod
|
|
177
|
+
async def list():
|
|
178
|
+
raise RuntimeError("network down")
|
|
179
|
+
|
|
180
|
+
llm_clients._CLIENTS["anthropic"] = ("k", Boom())
|
|
181
|
+
asyncio.run(llm_clients.keepalive_ping()) # must swallow, not raise
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
def test_keepalive_ping_calls_models_list():
|
|
185
|
+
hits = {"n": 0}
|
|
186
|
+
|
|
187
|
+
class Good:
|
|
188
|
+
class models:
|
|
189
|
+
@staticmethod
|
|
190
|
+
async def list():
|
|
191
|
+
hits["n"] += 1
|
|
192
|
+
|
|
193
|
+
llm_clients._CLIENTS["anthropic"] = ("k", Good())
|
|
194
|
+
asyncio.run(llm_clients.keepalive_ping())
|
|
195
|
+
assert hits["n"] == 1
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
# --------------------------------------------------------------------------
|
|
199
|
+
# prewarm — best-effort, only configured keys
|
|
200
|
+
# --------------------------------------------------------------------------
|
|
201
|
+
|
|
202
|
+
def test_prewarm_builds_only_configured_keys(monkeypatch):
|
|
203
|
+
built = []
|
|
204
|
+
monkeypatch.setattr(llm_clients, "get_anthropic_client", lambda k: built.append(("a", k)))
|
|
205
|
+
monkeypatch.setattr(llm_clients, "get_gemini_client", lambda k: built.append(("g", k)))
|
|
206
|
+
|
|
207
|
+
async def _noping():
|
|
208
|
+
return None
|
|
209
|
+
|
|
210
|
+
monkeypatch.setattr(llm_clients, "keepalive_ping", _noping)
|
|
211
|
+
|
|
212
|
+
cfg = SimpleNamespace(anthropic_api_key="k1", google_api_key="")
|
|
213
|
+
asyncio.run(llm_clients.prewarm(cfg))
|
|
214
|
+
|
|
215
|
+
assert built == [("a", "k1")], "only the configured anthropic key should build"
|
|
216
|
+
|
|
217
|
+
|
|
218
|
+
def test_prewarm_tolerates_raising_builder(monkeypatch):
|
|
219
|
+
def boom(_k):
|
|
220
|
+
raise RuntimeError("bad key")
|
|
221
|
+
|
|
222
|
+
monkeypatch.setattr(llm_clients, "get_anthropic_client", boom)
|
|
223
|
+
monkeypatch.setattr(llm_clients, "get_gemini_client", boom)
|
|
224
|
+
|
|
225
|
+
async def _noping():
|
|
226
|
+
return None
|
|
227
|
+
|
|
228
|
+
monkeypatch.setattr(llm_clients, "keepalive_ping", _noping)
|
|
229
|
+
|
|
230
|
+
cfg = SimpleNamespace(anthropic_api_key="k1", google_api_key="g1")
|
|
231
|
+
asyncio.run(llm_clients.prewarm(cfg)) # must not raise
|
|
232
|
+
|
|
233
|
+
|
|
234
|
+
# --------------------------------------------------------------------------
|
|
235
|
+
# close_all
|
|
236
|
+
# --------------------------------------------------------------------------
|
|
237
|
+
|
|
238
|
+
def test_close_all_closes_and_clears():
|
|
239
|
+
fc_a = FakeClient("a")
|
|
240
|
+
fc_g = FakeClient("g")
|
|
241
|
+
llm_clients._CLIENTS["anthropic"] = ("ka", fc_a)
|
|
242
|
+
llm_clients._CLIENTS["google"] = ("kg", fc_g)
|
|
243
|
+
|
|
244
|
+
asyncio.run(llm_clients.close_all())
|
|
245
|
+
|
|
246
|
+
assert fc_a.closed and fc_g.closed
|
|
247
|
+
assert llm_clients._CLIENTS == {}
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def test_close_all_swallows_failing_close():
|
|
251
|
+
class BadClose:
|
|
252
|
+
async def aclose(self):
|
|
253
|
+
raise RuntimeError("close blew up")
|
|
254
|
+
|
|
255
|
+
llm_clients._CLIENTS["anthropic"] = ("k", BadClose())
|
|
256
|
+
asyncio.run(llm_clients.close_all()) # must swallow
|
|
257
|
+
assert llm_clients._CLIENTS == {}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/__init__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/capture.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/audio/transcriber.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/__init__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/__main__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/flet_gui/app.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/hotkey_config.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/ollama_utils.py
RENAMED
|
File without changes
|
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/screenshot.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/__init__.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/app.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/routes.py
RENAMED
|
File without changes
|
{ai_interview_assistant-2.2.0 → ai_interview_assistant-2.2.2}/src/ai_interview/server/websocket.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|