agent-cli 0.61.2__py3-none-any.whl → 0.70.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. agent_cli/_extras.json +13 -0
  2. agent_cli/_requirements/.gitkeep +0 -0
  3. agent_cli/_requirements/audio.txt +79 -0
  4. agent_cli/_requirements/faster-whisper.txt +215 -0
  5. agent_cli/_requirements/kokoro.txt +425 -0
  6. agent_cli/_requirements/llm.txt +183 -0
  7. agent_cli/_requirements/memory.txt +355 -0
  8. agent_cli/_requirements/mlx-whisper.txt +222 -0
  9. agent_cli/_requirements/piper.txt +176 -0
  10. agent_cli/_requirements/rag.txt +402 -0
  11. agent_cli/_requirements/server.txt +154 -0
  12. agent_cli/_requirements/speed.txt +77 -0
  13. agent_cli/_requirements/vad.txt +155 -0
  14. agent_cli/agents/assistant.py +3 -1
  15. agent_cli/agents/autocorrect.py +5 -2
  16. agent_cli/agents/chat.py +3 -1
  17. agent_cli/agents/memory/__init__.py +2 -1
  18. agent_cli/agents/memory/add.py +2 -0
  19. agent_cli/agents/memory/proxy.py +7 -12
  20. agent_cli/agents/rag_proxy.py +5 -10
  21. agent_cli/agents/speak.py +3 -1
  22. agent_cli/agents/transcribe.py +7 -2
  23. agent_cli/agents/transcribe_daemon.py +3 -1
  24. agent_cli/agents/voice_edit.py +3 -1
  25. agent_cli/cli.py +19 -3
  26. agent_cli/config_cmd.py +1 -0
  27. agent_cli/core/chroma.py +4 -4
  28. agent_cli/core/deps.py +177 -25
  29. agent_cli/core/openai_proxy.py +9 -4
  30. agent_cli/core/process.py +2 -2
  31. agent_cli/core/reranker.py +5 -4
  32. agent_cli/core/utils.py +5 -3
  33. agent_cli/core/vad.py +2 -1
  34. agent_cli/core/watch.py +8 -6
  35. agent_cli/dev/cli.py +31 -34
  36. agent_cli/dev/coding_agents/base.py +1 -2
  37. agent_cli/dev/skill/SKILL.md +141 -0
  38. agent_cli/dev/skill/examples.md +571 -0
  39. agent_cli/dev/worktree.py +53 -5
  40. agent_cli/docs_gen.py +12 -42
  41. agent_cli/install/__init__.py +1 -1
  42. agent_cli/install/extras.py +174 -0
  43. agent_cli/memory/__init__.py +1 -18
  44. agent_cli/memory/_files.py +4 -1
  45. agent_cli/memory/_indexer.py +3 -2
  46. agent_cli/memory/_ingest.py +6 -5
  47. agent_cli/memory/_retrieval.py +18 -8
  48. agent_cli/memory/_streaming.py +2 -2
  49. agent_cli/memory/api.py +1 -1
  50. agent_cli/memory/client.py +1 -1
  51. agent_cli/memory/engine.py +1 -1
  52. agent_cli/rag/__init__.py +0 -19
  53. agent_cli/rag/_indexer.py +3 -2
  54. agent_cli/rag/api.py +1 -0
  55. agent_cli/scripts/.runtime/.gitkeep +0 -0
  56. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  57. agent_cli/scripts/sync_extras.py +138 -0
  58. agent_cli/server/cli.py +26 -24
  59. agent_cli/server/common.py +3 -4
  60. agent_cli/server/tts/api.py +1 -1
  61. agent_cli/server/whisper/backends/faster_whisper.py +30 -23
  62. agent_cli/server/whisper/wyoming_handler.py +22 -27
  63. agent_cli/services/_wyoming_utils.py +4 -2
  64. agent_cli/services/asr.py +13 -3
  65. agent_cli/services/llm.py +2 -1
  66. agent_cli/services/tts.py +5 -2
  67. agent_cli/services/wake_word.py +6 -3
  68. {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/METADATA +168 -73
  69. {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/RECORD +72 -54
  70. {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/WHEEL +1 -2
  71. agent_cli-0.61.2.dist-info/top_level.txt +0 -1
  72. {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/entry_points.txt +0 -0
  73. {agent_cli-0.61.2.dist-info → agent_cli-0.70.2.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,50 @@
1
+ #!/usr/bin/env python3
2
+ """Check that plugin skill files are in sync with source files."""
3
+
4
+ import sys
5
+ from pathlib import Path
6
+
7
+ SYNC_PAIRS = [
8
+ # Plugin marketplace distribution
9
+ ("agent_cli/dev/skill/SKILL.md", ".claude-plugin/skills/agent-cli-dev/SKILL.md"),
10
+ ("agent_cli/dev/skill/examples.md", ".claude-plugin/skills/agent-cli-dev/examples.md"),
11
+ # Project-local skill (for Claude Code working on this repo)
12
+ ("agent_cli/dev/skill/SKILL.md", ".claude/skills/agent-cli-dev/SKILL.md"),
13
+ ("agent_cli/dev/skill/examples.md", ".claude/skills/agent-cli-dev/examples.md"),
14
+ ]
15
+
16
+
17
+ def main() -> int:
18
+ """Check that plugin skill files match source files."""
19
+ root = Path(__file__).parent.parent
20
+ out_of_sync = []
21
+
22
+ for source, target in SYNC_PAIRS:
23
+ source_path = root / source
24
+ target_path = root / target
25
+
26
+ if not source_path.exists():
27
+ print(f"Source not found: {source}")
28
+ continue
29
+
30
+ if not target_path.exists():
31
+ out_of_sync.append((source, target, "target missing"))
32
+ continue
33
+
34
+ if source_path.read_text() != target_path.read_text():
35
+ out_of_sync.append((source, target, "content differs"))
36
+
37
+ if out_of_sync:
38
+ print("Plugin skill files are out of sync:")
39
+ for source, target, reason in out_of_sync:
40
+ print(f" {source} -> {target} ({reason})")
41
+ print("\nRun:")
42
+ print(" cp agent_cli/dev/skill/*.md .claude-plugin/skills/agent-cli-dev/")
43
+ print(" cp agent_cli/dev/skill/*.md .claude/skills/agent-cli-dev/")
44
+ return 1
45
+
46
+ return 0
47
+
48
+
49
+ if __name__ == "__main__":
50
+ sys.exit(main())
@@ -0,0 +1,138 @@
1
+ #!/usr/bin/env python3
2
+ """Generate _extras.json from pyproject.toml.
3
+
4
+ This script parses the optional-dependencies in pyproject.toml and generates
5
+ the agent_cli/_extras.json file with package-to-import mappings.
6
+
7
+ Usage:
8
+ python scripts/sync_extras.py
9
+ """
10
+
11
+ from __future__ import annotations
12
+
13
+ import json
14
+ import re
15
+ import sys
16
+ import tomllib
17
+ from pathlib import Path
18
+
19
+ REPO_ROOT = Path(__file__).parent.parent
20
+ PYPROJECT = REPO_ROOT / "pyproject.toml"
21
+ EXTRAS_FILE = REPO_ROOT / "agent_cli" / "_extras.json"
22
+
23
+ # Extras to skip (dev/test dependencies, not runtime installable)
24
+ SKIP_EXTRAS = {"dev", "test"}
25
+
26
+ # Manual mapping of extra name -> (description, list of import names)
27
+ # Import names should be the Python module name (how you import it)
28
+ # Bundle extras (voice, cloud, full) have empty import lists since they just install other extras
29
+ EXTRA_METADATA: dict[str, tuple[str, list[str]]] = {
30
+ # Provider extras (base dependencies now optional)
31
+ "audio": ("Audio recording/playback", ["sounddevice"]),
32
+ "wyoming": ("Wyoming protocol support", ["wyoming"]),
33
+ "openai": ("OpenAI API provider", ["openai"]),
34
+ "gemini": ("Google Gemini provider", ["google.genai"]),
35
+ "llm": ("LLM framework (pydantic-ai)", ["pydantic_ai"]),
36
+ # Feature extras
37
+ "rag": ("RAG proxy (ChromaDB, embeddings)", ["chromadb"]),
38
+ "memory": ("Long-term memory proxy", ["chromadb", "yaml"]),
39
+ "vad": ("Voice Activity Detection (silero-vad)", ["silero_vad"]),
40
+ "whisper": ("Local Whisper ASR (faster-whisper)", ["faster_whisper"]),
41
+ "whisper-mlx": ("MLX Whisper for Apple Silicon", ["mlx_whisper"]),
42
+ "tts": ("Local Piper TTS", ["piper"]),
43
+ "tts-kokoro": ("Kokoro neural TTS", ["kokoro"]),
44
+ "server": ("FastAPI server components", ["fastapi"]),
45
+ "speed": ("Audio speed adjustment (audiostretchy)", ["audiostretchy"]),
46
+ }
47
+
48
+
49
+ def get_extras_from_pyproject() -> set[str]:
50
+ """Parse optional-dependencies from pyproject.toml."""
51
+ with PYPROJECT.open("rb") as f:
52
+ data = tomllib.load(f)
53
+ all_extras = set(data.get("project", {}).get("optional-dependencies", {}).keys())
54
+ return all_extras - SKIP_EXTRAS
55
+
56
+
57
+ def extract_package_name(dep: str) -> str:
58
+ """Extract the package name from a dependency specification.
59
+
60
+ Examples:
61
+ "chromadb>=0.4.22" -> "chromadb"
62
+ "pydantic-ai-slim[openai,duckduckgo]" -> "pydantic-ai-slim"
63
+ 'mlx-whisper>=0.4.0; sys_platform == "darwin"' -> "mlx-whisper"
64
+
65
+ """
66
+ # Remove markers (;...) and extras ([...])
67
+ dep = re.split(r"[;\[]", dep)[0]
68
+ # Remove version specifiers
69
+ dep = re.split(r"[<>=!~]", dep)[0]
70
+ return dep.strip()
71
+
72
+
73
+ def package_to_import_name(package: str) -> str:
74
+ """Convert a package name to its Python import name.
75
+
76
+ Examples:
77
+ "google-genai" -> "google.genai"
78
+ "pydantic-ai-slim" -> "pydantic_ai"
79
+ "silero-vad" -> "silero_vad"
80
+ "faster-whisper" -> "faster_whisper"
81
+
82
+ """
83
+ # Special cases where the import name differs significantly
84
+ special_cases = {
85
+ "google-genai": "google.genai",
86
+ "pydantic-ai-slim": "pydantic_ai",
87
+ "silero-vad": "silero_vad",
88
+ "faster-whisper": "faster_whisper",
89
+ "mlx-whisper": "mlx_whisper",
90
+ "piper-tts": "piper",
91
+ "huggingface-hub": "huggingface_hub",
92
+ "fastapi": "fastapi",
93
+ "audiostretchy": "audiostretchy",
94
+ }
95
+ if package in special_cases:
96
+ return special_cases[package]
97
+ # Default: replace hyphens with underscores
98
+ return package.replace("-", "_")
99
+
100
+
101
+ def generate_extras_json(extras: set[str]) -> dict[str, list]:
102
+ """Generate the content for _extras.json."""
103
+ result = {}
104
+ for extra in sorted(extras):
105
+ if extra in EXTRA_METADATA:
106
+ desc, imports = EXTRA_METADATA[extra]
107
+ result[extra] = [desc, imports]
108
+ else:
109
+ # Unknown extra - add a placeholder
110
+ result[extra] = ["TODO: add description", []]
111
+ return result
112
+
113
+
114
+ def check_missing_metadata(extras: set[str]) -> list[str]:
115
+ """Check for extras that don't have metadata defined."""
116
+ return [e for e in extras if e not in EXTRA_METADATA]
117
+
118
+
119
+ def main() -> int:
120
+ """Generate _extras.json from pyproject.toml."""
121
+ extras = get_extras_from_pyproject()
122
+
123
+ # Check for missing metadata
124
+ missing = check_missing_metadata(extras)
125
+ if missing:
126
+ print(f"Warning: The following extras need metadata in EXTRA_METADATA: {missing}")
127
+ print("Please update EXTRA_METADATA in scripts/sync_extras.py")
128
+
129
+ # Generate the file
130
+ content = generate_extras_json(extras)
131
+ EXTRAS_FILE.write_text(json.dumps(content, indent=2) + "\n")
132
+ print(f"Generated {EXTRAS_FILE}")
133
+
134
+ return 0
135
+
136
+
137
+ if __name__ == "__main__":
138
+ sys.exit(main())
agent_cli/server/cli.py CHANGED
@@ -9,23 +9,22 @@ from pathlib import Path # noqa: TC003 - Typer needs this at runtime
9
9
  from typing import Annotated
10
10
 
11
11
  import typer
12
- from rich.console import Console
13
12
 
14
13
  from agent_cli.cli import app as main_app
14
+ from agent_cli.core.deps import requires_extras
15
15
  from agent_cli.core.process import set_process_title
16
+ from agent_cli.core.utils import console, err_console
16
17
  from agent_cli.server.common import setup_rich_logging
17
18
 
18
- console = Console()
19
- err_console = Console(stderr=True)
20
19
  logger = logging.getLogger(__name__)
21
20
 
22
- # Check for optional dependencies
23
- HAS_UVICORN = find_spec("uvicorn") is not None
24
- HAS_FASTAPI = find_spec("fastapi") is not None
25
- HAS_FASTER_WHISPER = find_spec("faster_whisper") is not None
26
- HAS_MLX_WHISPER = find_spec("mlx_whisper") is not None
27
- HAS_PIPER = find_spec("piper") is not None
28
- HAS_KOKORO = find_spec("kokoro") is not None
21
+ # Check for optional dependencies at call time (not module load time)
22
+ # This is important because auto-install may install packages after the module is loaded
23
+
24
+
25
+ def _has(package: str) -> bool:
26
+ return find_spec(package) is not None
27
+
29
28
 
30
29
  app = typer.Typer(
31
30
  name="server",
@@ -34,7 +33,7 @@ app = typer.Typer(
34
33
  rich_markup_mode="markdown",
35
34
  no_args_is_help=True,
36
35
  )
37
- main_app.add_typer(app, name="server")
36
+ main_app.add_typer(app, name="server", rich_help_panel="Servers")
38
37
 
39
38
 
40
39
  @app.callback()
@@ -47,7 +46,7 @@ def server_callback(ctx: typer.Context) -> None:
47
46
 
48
47
  def _check_server_deps() -> None:
49
48
  """Check that server dependencies are available."""
50
- if not HAS_UVICORN or not HAS_FASTAPI:
49
+ if not _has("uvicorn") or not _has("fastapi"):
51
50
  err_console.print(
52
51
  "[bold red]Error:[/bold red] Server dependencies not installed. "
53
52
  "Run: [cyan]pip install agent-cli\\[server][/cyan] "
@@ -61,7 +60,7 @@ def _check_tts_deps(backend: str = "auto") -> None:
61
60
  _check_server_deps()
62
61
 
63
62
  if backend == "kokoro":
64
- if not HAS_KOKORO:
63
+ if not _has("kokoro"):
65
64
  err_console.print(
66
65
  "[bold red]Error:[/bold red] Kokoro backend requires kokoro. "
67
66
  "Run: [cyan]pip install agent-cli\\[tts-kokoro][/cyan] "
@@ -71,7 +70,7 @@ def _check_tts_deps(backend: str = "auto") -> None:
71
70
  return
72
71
 
73
72
  if backend == "piper":
74
- if not HAS_PIPER:
73
+ if not _has("piper"):
75
74
  err_console.print(
76
75
  "[bold red]Error:[/bold red] Piper backend requires piper-tts. "
77
76
  "Run: [cyan]pip install agent-cli\\[tts][/cyan] "
@@ -81,7 +80,7 @@ def _check_tts_deps(backend: str = "auto") -> None:
81
80
  return
82
81
 
83
82
  # For auto, check if either is available
84
- if not HAS_PIPER and not HAS_KOKORO:
83
+ if not _has("piper") and not _has("kokoro"):
85
84
  err_console.print(
86
85
  "[bold red]Error:[/bold red] No TTS backend available. "
87
86
  "Run: [cyan]pip install agent-cli\\[tts][/cyan] for Piper "
@@ -135,7 +134,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
135
134
  """Check that Whisper dependencies are available."""
136
135
  _check_server_deps()
137
136
  if download_only:
138
- if not HAS_FASTER_WHISPER:
137
+ if not _has("faster_whisper"):
139
138
  err_console.print(
140
139
  "[bold red]Error:[/bold red] faster-whisper is required for --download-only. "
141
140
  "Run: [cyan]pip install agent-cli\\[whisper][/cyan] "
@@ -145,7 +144,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
145
144
  return
146
145
 
147
146
  if backend == "mlx":
148
- if not HAS_MLX_WHISPER:
147
+ if not _has("mlx_whisper"):
149
148
  err_console.print(
150
149
  "[bold red]Error:[/bold red] MLX Whisper backend requires mlx-whisper. "
151
150
  "Run: [cyan]pip install mlx-whisper[/cyan]",
@@ -153,7 +152,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
153
152
  raise typer.Exit(1)
154
153
  return
155
154
 
156
- if not HAS_FASTER_WHISPER:
155
+ if not _has("faster_whisper"):
157
156
  err_console.print(
158
157
  "[bold red]Error:[/bold red] Whisper dependencies not installed. "
159
158
  "Run: [cyan]pip install agent-cli\\[whisper][/cyan] "
@@ -163,6 +162,7 @@ def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
163
162
 
164
163
 
165
164
  @app.command("whisper")
165
+ @requires_extras("server", "faster-whisper|mlx-whisper")
166
166
  def whisper_cmd( # noqa: PLR0912, PLR0915
167
167
  model: Annotated[
168
168
  list[str] | None,
@@ -293,7 +293,7 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
293
293
 
294
294
  """
295
295
  # Setup Rich logging for consistent output
296
- setup_rich_logging(log_level, console=console)
296
+ setup_rich_logging(log_level)
297
297
 
298
298
  valid_backends = ("auto", "faster-whisper", "mlx")
299
299
  if backend not in valid_backends:
@@ -421,8 +421,9 @@ def whisper_cmd( # noqa: PLR0912, PLR0915
421
421
  )
422
422
 
423
423
 
424
- @app.command("transcription-proxy")
425
- def transcription_proxy_cmd(
424
+ @app.command("transcribe-proxy")
425
+ @requires_extras("server", "audio", "llm")
426
+ def transcribe_proxy_cmd(
426
427
  host: Annotated[
427
428
  str,
428
429
  typer.Option("--host", help="Host to bind the server to"),
@@ -449,10 +450,10 @@ def transcription_proxy_cmd(
449
450
 
450
451
  Examples:
451
452
  # Run on default port
452
- agent-cli server transcription-proxy
453
+ agent-cli server transcribe-proxy
453
454
 
454
455
  # Run on custom port
455
- agent-cli server transcription-proxy --port 8080
456
+ agent-cli server transcribe-proxy --port 8080
456
457
 
457
458
  """
458
459
  _check_server_deps()
@@ -475,6 +476,7 @@ def transcription_proxy_cmd(
475
476
 
476
477
 
477
478
  @app.command("tts")
479
+ @requires_extras("server", "piper|kokoro")
478
480
  def tts_cmd( # noqa: PLR0915
479
481
  model: Annotated[
480
482
  list[str] | None,
@@ -610,7 +612,7 @@ def tts_cmd( # noqa: PLR0915
610
612
 
611
613
  """
612
614
  # Setup Rich logging for consistent output
613
- setup_rich_logging(log_level, console=console)
615
+ setup_rich_logging(log_level)
614
616
 
615
617
  valid_backends = ("auto", "piper", "kokoro")
616
618
  if backend not in valid_backends:
@@ -9,10 +9,10 @@ import logging
9
9
  from contextlib import asynccontextmanager
10
10
  from typing import TYPE_CHECKING, Any, Protocol
11
11
 
12
- from rich.console import Console
13
12
  from rich.logging import RichHandler
14
13
 
15
14
  from agent_cli import constants
15
+ from agent_cli.core.utils import console
16
16
 
17
17
  if TYPE_CHECKING:
18
18
  import wave
@@ -128,7 +128,7 @@ def configure_app(app: FastAPI) -> None:
128
128
  return await log_requests_middleware(request, call_next)
129
129
 
130
130
 
131
- def setup_rich_logging(log_level: str = "info", *, console: Console | None = None) -> None:
131
+ def setup_rich_logging(log_level: str = "info") -> None:
132
132
  """Configure logging to use Rich for consistent, pretty output.
133
133
 
134
134
  This configures:
@@ -141,11 +141,10 @@ def setup_rich_logging(log_level: str = "info", *, console: Console | None = Non
141
141
 
142
142
  """
143
143
  level = getattr(logging, log_level.upper(), logging.INFO)
144
- rich_console = console or Console()
145
144
 
146
145
  # Create Rich handler with clean format
147
146
  handler = RichHandler(
148
- console=rich_console,
147
+ console=console,
149
148
  show_time=True,
150
149
  show_level=True,
151
150
  show_path=False, # Don't show file:line - too verbose
@@ -109,7 +109,7 @@ class SpeechRequest(BaseModel):
109
109
  input: str
110
110
  model: str = "tts-1"
111
111
  voice: str = "alloy"
112
- response_format: Literal["wav", "pcm", "mp3"] = "pcm"
112
+ response_format: Literal["mp3", "wav", "pcm"] = "mp3"
113
113
  speed: float = 1.0
114
114
  stream_format: Literal["audio"] | None = None
115
115
 
@@ -6,6 +6,7 @@ import asyncio
6
6
  import logging
7
7
  import tempfile
8
8
  from concurrent.futures import ProcessPoolExecutor
9
+ from dataclasses import dataclass
9
10
  from multiprocessing import get_context
10
11
  from pathlib import Path
11
12
  from typing import Any, Literal
@@ -19,6 +20,24 @@ from agent_cli.server.whisper.backends.base import (
19
20
  logger = logging.getLogger(__name__)
20
21
 
21
22
 
23
+ # --- Subprocess state (only used within subprocess worker) ---
24
+ # This state persists across function calls within the subprocess because:
25
+ # 1. Model loading is expensive and must be reused across transcription calls
26
+ # 2. CTranslate2 models cannot be pickled/passed through IPC queues
27
+ # 3. The subprocess is long-lived (ProcessPoolExecutor reuses workers)
28
+
29
+
30
+ @dataclass
31
+ class _SubprocessState:
32
+ """Container for subprocess-local state. Not shared with main process."""
33
+
34
+ model: Any = None
35
+ device: str | None = None
36
+
37
+
38
+ _state = _SubprocessState()
39
+
40
+
22
41
  # --- Subprocess worker functions (run in isolated process) ---
23
42
 
24
43
 
@@ -40,28 +59,22 @@ def _load_model_in_subprocess(
40
59
  cpu_threads=cpu_threads,
41
60
  download_root=download_root,
42
61
  )
43
- return str(model.model.device)
62
+
63
+ # Store in subprocess state for reuse across transcription calls
64
+ _state.model = model
65
+ _state.device = str(model.model.device)
66
+
67
+ return _state.device
44
68
 
45
69
 
46
70
  def _transcribe_in_subprocess(
47
- model_name: str,
48
- device: str,
49
- compute_type: str,
50
- cpu_threads: int,
51
- download_root: str | None,
52
71
  audio_bytes: bytes,
53
72
  kwargs: dict[str, Any],
54
73
  ) -> dict[str, Any]:
55
- """Run transcription in subprocess. Model is loaded fresh each call."""
56
- from faster_whisper import WhisperModel # noqa: PLC0415
57
-
58
- model = WhisperModel(
59
- model_name,
60
- device=device,
61
- compute_type=compute_type,
62
- cpu_threads=cpu_threads,
63
- download_root=download_root,
64
- )
74
+ """Run transcription in subprocess. Reuses model from _state."""
75
+ if _state.model is None:
76
+ msg = "Model not loaded in subprocess. Call _load_model_in_subprocess first."
77
+ raise RuntimeError(msg)
65
78
 
66
79
  # Write audio to temp file - faster-whisper needs a file path
67
80
  with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
@@ -69,7 +82,7 @@ def _transcribe_in_subprocess(
69
82
  tmp_path = tmp.name
70
83
 
71
84
  try:
72
- segments, info = model.transcribe(tmp_path, **kwargs)
85
+ segments, info = _state.model.transcribe(tmp_path, **kwargs)
73
86
  segment_list = list(segments) # Consume lazy generator
74
87
  finally:
75
88
  Path(tmp_path).unlink(missing_ok=True)
@@ -195,16 +208,10 @@ class FasterWhisperBackend:
195
208
  "word_timestamps": word_timestamps,
196
209
  }
197
210
 
198
- download_root = str(self._config.cache_dir) if self._config.cache_dir else None
199
211
  loop = asyncio.get_running_loop()
200
212
  result = await loop.run_in_executor(
201
213
  self._executor,
202
214
  _transcribe_in_subprocess,
203
- self._config.model_name,
204
- self._config.device,
205
- self._config.compute_type,
206
- self._config.cpu_threads,
207
- download_root,
208
215
  audio,
209
216
  kwargs,
210
217
  )
@@ -2,19 +2,18 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
- import io
6
5
  import logging
7
- import wave
8
6
  from functools import partial
9
7
  from typing import TYPE_CHECKING
10
8
 
11
9
  from wyoming.asr import Transcribe, Transcript
12
- from wyoming.audio import AudioChunk, AudioStop
10
+ from wyoming.audio import AudioChunk, AudioChunkConverter, AudioStop
13
11
  from wyoming.info import AsrModel, AsrProgram, Attribution, Describe, Info
14
12
  from wyoming.server import AsyncEventHandler, AsyncServer
15
13
 
16
- from agent_cli.server.common import setup_wav_file
14
+ from agent_cli import constants
17
15
  from agent_cli.server.whisper.languages import WHISPER_LANGUAGE_CODES
16
+ from agent_cli.services import pcm_to_wav
18
17
 
19
18
  if TYPE_CHECKING:
20
19
  from wyoming.event import Event
@@ -49,8 +48,12 @@ class WyomingWhisperHandler(AsyncEventHandler):
49
48
  """
50
49
  super().__init__(*args, **kwargs)
51
50
  self._registry = registry
52
- self._audio_buffer: io.BytesIO | None = None
53
- self._wav_file: wave.Wave_write | None = None
51
+ self._audio_bytes: bytes = b""
52
+ self._audio_converter = AudioChunkConverter(
53
+ rate=constants.AUDIO_RATE,
54
+ width=constants.AUDIO_FORMAT_WIDTH,
55
+ channels=constants.AUDIO_CHANNELS,
56
+ )
54
57
  self._language: str | None = None
55
58
  self._initial_prompt: str | None = None
56
59
 
@@ -80,39 +83,31 @@ class WyomingWhisperHandler(AsyncEventHandler):
80
83
 
81
84
  async def _handle_audio_chunk(self, event: Event) -> bool:
82
85
  """Handle an audio chunk event."""
83
- chunk = AudioChunk.from_event(event)
84
-
85
- if self._wav_file is None:
86
+ if not self._audio_bytes:
86
87
  logger.debug("AudioChunk begin")
87
- self._audio_buffer = io.BytesIO()
88
- self._wav_file = wave.open(self._audio_buffer, "wb") # noqa: SIM115
89
- setup_wav_file(
90
- self._wav_file,
91
- rate=chunk.rate,
92
- channels=chunk.channels,
93
- sample_width=chunk.width,
94
- )
95
88
 
96
- self._wav_file.writeframes(chunk.audio)
89
+ chunk = AudioChunk.from_event(event)
90
+ chunk = self._audio_converter.convert(chunk)
91
+ self._audio_bytes += chunk.audio
97
92
  return True
98
93
 
99
94
  async def _handle_audio_stop(self) -> bool:
100
95
  """Handle audio stop event - transcribe the collected audio."""
101
96
  logger.debug("AudioStop")
102
97
 
103
- if self._wav_file is None or self._audio_buffer is None:
98
+ if not self._audio_bytes:
104
99
  logger.warning("AudioStop received but no audio data")
105
100
  await self.write_event(Transcript(text="").event())
106
101
  return False
107
102
 
108
- # Close WAV file
109
- self._wav_file.close()
110
- self._wav_file = None
111
-
112
- # Get audio data
113
- self._audio_buffer.seek(0)
114
- audio_data = self._audio_buffer.read()
115
- self._audio_buffer = None
103
+ # Wrap PCM in WAV format for the backend
104
+ audio_data = pcm_to_wav(
105
+ self._audio_bytes,
106
+ sample_rate=constants.AUDIO_RATE,
107
+ sample_width=constants.AUDIO_FORMAT_WIDTH,
108
+ channels=constants.AUDIO_CHANNELS,
109
+ )
110
+ self._audio_bytes = b""
116
111
 
117
112
  # Transcribe
118
113
  try:
@@ -5,14 +5,14 @@ from __future__ import annotations
5
5
  from contextlib import asynccontextmanager
6
6
  from typing import TYPE_CHECKING
7
7
 
8
- from wyoming.client import AsyncClient
9
-
10
8
  from agent_cli.core.utils import print_error_message
11
9
 
12
10
  if TYPE_CHECKING:
13
11
  import logging
14
12
  from collections.abc import AsyncGenerator
15
13
 
14
+ from wyoming.client import AsyncClient
15
+
16
16
 
17
17
  @asynccontextmanager
18
18
  async def wyoming_client_context(
@@ -40,6 +40,8 @@ async def wyoming_client_context(
40
40
  Exception: For other connection errors
41
41
 
42
42
  """
43
+ from wyoming.client import AsyncClient # noqa: PLC0415
44
+
43
45
  uri = f"tcp://{server_ip}:{server_port}"
44
46
  logger.info("Connecting to Wyoming %s server at %s", server_type, uri)
45
47
 
agent_cli/services/asr.py CHANGED
@@ -10,9 +10,6 @@ from functools import partial
10
10
  from pathlib import Path
11
11
  from typing import TYPE_CHECKING
12
12
 
13
- from wyoming.asr import Transcribe, Transcript, TranscriptChunk, TranscriptStart, TranscriptStop
14
- from wyoming.audio import AudioChunk, AudioStart, AudioStop
15
-
16
13
  from agent_cli import constants
17
14
  from agent_cli.core.audio import (
18
15
  open_audio_stream,
@@ -225,6 +222,9 @@ async def _send_audio(
225
222
  initial_prompt: str | None = None,
226
223
  ) -> None:
227
224
  """Read from mic and send to Wyoming server."""
225
+ from wyoming.asr import Transcribe # noqa: PLC0415
226
+ from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
227
+
228
228
  # Build context with initial_prompt if provided
229
229
  context = {"initial_prompt": initial_prompt} if initial_prompt else None
230
230
  await client.write_event(Transcribe(context=context).event())
@@ -282,6 +282,13 @@ async def _receive_transcript(
282
282
  final_callback: Callable[[str], None] | None = None,
283
283
  ) -> str:
284
284
  """Receive transcription events and return the final transcript."""
285
+ from wyoming.asr import ( # noqa: PLC0415
286
+ Transcript,
287
+ TranscriptChunk,
288
+ TranscriptStart,
289
+ TranscriptStop,
290
+ )
291
+
285
292
  transcript_text = ""
286
293
  while True:
287
294
  event = await client.read_event()
@@ -370,6 +377,9 @@ async def _transcribe_recorded_audio_wyoming(
370
377
  **_kwargs: object,
371
378
  ) -> str:
372
379
  """Process pre-recorded audio data with Wyoming ASR server."""
380
+ from wyoming.asr import Transcribe # noqa: PLC0415
381
+ from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
382
+
373
383
  try:
374
384
  async with wyoming_client_context(
375
385
  wyoming_asr_cfg.asr_wyoming_ip,
agent_cli/services/llm.py CHANGED
@@ -6,7 +6,6 @@ import sys
6
6
  import time
7
7
  from typing import TYPE_CHECKING
8
8
 
9
- import pyperclip
10
9
  from rich.live import Live
11
10
 
12
11
  from agent_cli.core.utils import console, live_timer, print_error_message, print_output_panel
@@ -156,6 +155,8 @@ async def get_llm_response(
156
155
  result_text = result.output
157
156
 
158
157
  if clipboard:
158
+ import pyperclip # noqa: PLC0415
159
+
159
160
  pyperclip.copy(result_text)
160
161
  logger.info("Copied result to clipboard.")
161
162