PyPI - agent-cli - Versions diffs - 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl - Mend

agent-cli 0.74.0py3-none-any.whl → 0.75.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

agent_cli/_extras.json +23 -8
agent_cli/_requirements/whisper-transformers.txt +256 -0
agent_cli/example-config.toml +4 -1
agent_cli/install/extras.py +3 -1
agent_cli/scripts/sync_extras.py +10 -0
agent_cli/server/cli.py +45 -25
agent_cli/server/whisper/backends/__init__.py +8 -1
agent_cli/server/whisper/backends/mlx.py +10 -0
agent_cli/server/whisper/backends/transformers.py +315 -0
{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/METADATA +12 -6
{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/RECORD +14 -12
{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/WHEEL +0 -0
{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/entry_points.txt +0 -0
{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/licenses/LICENSE +0 -0

agent_cli/_extras.json CHANGED Viewed

@@ -6,12 +6,16 @@
     ]
   ],
   "faster-whisper": [
-    "TODO: add description",
-    []
+    "Whisper ASR via CTranslate2",
+    [
+      "faster_whisper"
+    ]
   ],
   "kokoro": [
-    "TODO: add description",
-    []
+    "Kokoro neural TTS (GPU)",
+    [
+      "kokoro"
+    ]
   ],
   "llm": [
     "LLM framework (pydantic-ai)",
@@ -28,12 +32,16 @@
     ]
   ],
   "mlx-whisper": [
-    "TODO: add description",
-    []
+    "Whisper ASR for Apple Silicon",
+    [
+      "mlx_whisper"
+    ]
   ],
   "piper": [
-    "TODO: add description",
-    []
+    "Piper TTS (CPU)",
+    [
+      "piper"
+    ]
   ],
   "rag": [
     "RAG proxy (ChromaDB, embeddings)",
@@ -60,6 +68,13 @@
       "silero_vad"
     ]
   ],
+  "whisper-transformers": [
+    "Whisper ASR via HuggingFace transformers",
+    [
+      "transformers",
+      "torch"
+    ]
+  ],
   "wyoming": [
     "Wyoming protocol support",
     [

agent_cli/_requirements/whisper-transformers.txt ADDED Viewed

@@ -0,0 +1,256 @@
+# This file was autogenerated by uv via the following command:
+#    uv export --extra whisper-transformers --no-dev --no-emit-project --no-hashes
+annotated-doc==0.0.4
+    # via fastapi
+annotated-types==0.7.0
+    # via pydantic
+anyio==4.12.1
+    # via
+    #   httpx
+    #   starlette
+    #   watchfiles
+certifi==2026.1.4
+    # via
+    #   httpcore
+    #   httpx
+    #   requests
+    #   sentry-sdk
+charset-normalizer==3.4.4
+    # via requests
+click==8.3.1
+    # via
+    #   rich-toolkit
+    #   typer
+    #   typer-slim
+    #   uvicorn
+colorama==0.4.6 ; sys_platform == 'win32'
+    # via
+    #   click
+    #   tqdm
+    #   uvicorn
+dnspython==2.8.0
+    # via email-validator
+dotenv==0.9.9
+    # via agent-cli
+email-validator==2.3.0
+    # via
+    #   fastapi
+    #   pydantic
+fastapi==0.128.0
+    # via agent-cli
+fastapi-cli==0.0.20
+    # via fastapi
+fastapi-cloud-cli==0.10.1
+    # via fastapi-cli
+fastar==0.8.0
+    # via fastapi-cloud-cli
+filelock==3.20.3
+    # via
+    #   huggingface-hub
+    #   torch
+    #   transformers
+fsspec==2026.1.0
+    # via
+    #   huggingface-hub
+    #   torch
+h11==0.16.0
+    # via
+    #   httpcore
+    #   uvicorn
+hf-xet==1.2.0 ; platform_machine == 'aarch64' or platform_machine == 'amd64' or platform_machine == 'arm64' or platform_machine == 'x86_64'
+    # via huggingface-hub
+httpcore==1.0.9
+    # via httpx
+httptools==0.7.1
+    # via uvicorn
+httpx==0.28.1
+    # via
+    #   agent-cli
+    #   fastapi
+    #   fastapi-cloud-cli
+huggingface-hub==0.36.0
+    # via
+    #   tokenizers
+    #   transformers
+idna==3.11
+    # via
+    #   anyio
+    #   email-validator
+    #   httpx
+    #   requests
+jinja2==3.1.6
+    # via
+    #   fastapi
+    #   torch
+markdown-it-py==4.0.0
+    # via rich
+markupsafe==3.0.3
+    # via jinja2
+mdurl==0.1.2
+    # via markdown-it-py
+mpmath==1.3.0
+    # via sympy
+networkx==3.6.1
+    # via torch
+numpy==2.3.5
+    # via transformers
+nvidia-cublas-cu12==12.8.4.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cudnn-cu12
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cuda-cupti-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-nvrtc-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cuda-runtime-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cudnn-cu12==9.10.2.21 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufft-cu12==11.3.3.83 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cufile-cu12==1.13.1.3 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-curand-cu12==10.3.9.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusolver-cu12==11.7.3.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-cusparse-cu12==12.5.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cusolver-cu12
+    #   torch
+nvidia-cusparselt-cu12==0.7.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nccl-cu12==2.27.5 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvjitlink-cu12==12.8.93 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via
+    #   nvidia-cufft-cu12
+    #   nvidia-cusolver-cu12
+    #   nvidia-cusparse-cu12
+    #   torch
+nvidia-nvshmem-cu12==3.3.20 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+nvidia-nvtx-cu12==12.8.90 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+packaging==25.0
+    # via
+    #   huggingface-hub
+    #   transformers
+psutil==7.2.1 ; sys_platform == 'win32'
+    # via agent-cli
+pydantic==2.12.5
+    # via
+    #   agent-cli
+    #   fastapi
+    #   fastapi-cloud-cli
+    #   pydantic-extra-types
+    #   pydantic-settings
+pydantic-core==2.41.5
+    # via pydantic
+pydantic-extra-types==2.11.0
+    # via fastapi
+pydantic-settings==2.12.0
+    # via fastapi
+pygments==2.19.2
+    # via rich
+pyperclip==1.11.0
+    # via agent-cli
+python-dotenv==1.2.1
+    # via
+    #   dotenv
+    #   pydantic-settings
+    #   uvicorn
+python-multipart==0.0.21
+    # via fastapi
+pyyaml==6.0.3
+    # via
+    #   huggingface-hub
+    #   transformers
+    #   uvicorn
+regex==2026.1.15
+    # via transformers
+requests==2.32.5
+    # via
+    #   huggingface-hub
+    #   transformers
+rich==14.2.0
+    # via
+    #   agent-cli
+    #   rich-toolkit
+    #   typer
+    #   typer-slim
+rich-toolkit==0.17.1
+    # via
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+rignore==0.7.6
+    # via fastapi-cloud-cli
+safetensors==0.7.0
+    # via transformers
+sentry-sdk==2.49.0
+    # via fastapi-cloud-cli
+setproctitle==1.3.7
+    # via agent-cli
+setuptools==80.9.0 ; python_full_version >= '3.12'
+    # via torch
+shellingham==1.5.4
+    # via
+    #   typer
+    #   typer-slim
+starlette==0.50.0
+    # via fastapi
+sympy==1.14.0
+    # via torch
+tokenizers==0.22.2
+    # via transformers
+torch==2.9.1
+    # via agent-cli
+tqdm==4.67.1
+    # via
+    #   huggingface-hub
+    #   transformers
+transformers==4.57.5
+    # via agent-cli
+triton==3.5.1 ; platform_machine == 'x86_64' and sys_platform == 'linux'
+    # via torch
+typer==0.21.1
+    # via
+    #   agent-cli
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+typer-slim==0.21.1
+    # via agent-cli
+typing-extensions==4.15.0
+    # via
+    #   anyio
+    #   fastapi
+    #   huggingface-hub
+    #   pydantic
+    #   pydantic-core
+    #   pydantic-extra-types
+    #   rich-toolkit
+    #   starlette
+    #   torch
+    #   typer
+    #   typer-slim
+    #   typing-inspection
+typing-inspection==0.4.2
+    # via
+    #   pydantic
+    #   pydantic-settings
+urllib3==2.3.0
+    # via
+    #   requests
+    #   sentry-sdk
+uvicorn==0.40.0
+    # via
+    #   fastapi
+    #   fastapi-cli
+    #   fastapi-cloud-cli
+uvloop==0.22.1 ; platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'
+    # via uvicorn
+watchfiles==1.1.1
+    # via uvicorn
+websockets==15.0.1
+    # via uvicorn

agent_cli/example-config.toml CHANGED Viewed

@@ -13,7 +13,10 @@
 [defaults]
 # --- Provider Selection ---
-# Select the default provider for each service ("ollama"/"openai"/"gemini" for LLM, "wyoming"/"openai" for ASR).
+# Select the default provider for each service.
+# LLM: "ollama", "openai", or "gemini"
+# ASR: "wyoming", "openai", or "gemini"
+# TTS: "wyoming", "openai", "kokoro", or "gemini"
 llm-provider = "ollama"  # "local" still works as a deprecated alias
 tts-provider = "wyoming"

agent_cli/install/extras.py CHANGED Viewed

@@ -132,7 +132,8 @@ def install_extras(
         list[str] | None,
         typer.Argument(
             help="Extras to install: `rag`, `memory`, `vad`, `audio`, `piper`, `kokoro`, "
-            "`faster-whisper`, `mlx-whisper`, `wyoming`, `server`, `speed`, `llm`",
+            "`faster-whisper`, `mlx-whisper`, `whisper-transformers`, `wyoming`, `server`, "
+            "`speed`, `llm`",
         ),
     ] = None,
     list_extras: Annotated[
@@ -163,6 +164,7 @@ def install_extras(
     - `kokoro` - Kokoro neural TTS engine
     - `faster-whisper` - Whisper ASR for CUDA/CPU
     - `mlx-whisper` - Whisper ASR for Apple Silicon
+    - `whisper-transformers` - Whisper ASR via HuggingFace transformers (safetensors)
     - `wyoming` - Wyoming protocol for ASR/TTS servers
     - `server` - FastAPI server components
     - `speed` - Audio speed adjustment

agent_cli/scripts/sync_extras.py CHANGED Viewed

@@ -43,6 +43,16 @@ EXTRA_METADATA: dict[str, tuple[str, list[str]]] = {
     "tts-kokoro": ("Kokoro neural TTS", ["kokoro"]),
     "server": ("FastAPI server components", ["fastapi"]),
     "speed": ("Audio speed adjustment (audiostretchy)", ["audiostretchy"]),
+    # Whisper backends
+    "faster-whisper": ("Whisper ASR via CTranslate2", ["faster_whisper"]),
+    "mlx-whisper": ("Whisper ASR for Apple Silicon", ["mlx_whisper"]),
+    "whisper-transformers": (
+        "Whisper ASR via HuggingFace transformers",
+        ["transformers", "torch"],
+    ),
+    # TTS backends
+    "piper": ("Piper TTS (CPU)", ["piper"]),
+    "kokoro": ("Kokoro neural TTS (GPU)", ["kokoro"]),
 }

agent_cli/server/cli.py CHANGED Viewed

@@ -157,36 +157,43 @@ def _download_tts_models(
 def _check_whisper_deps(backend: str, *, download_only: bool = False) -> None:
     """Check that Whisper dependencies are available."""
     _check_server_deps()
-    if download_only:
-        if not _has("faster_whisper"):
+    if backend == "mlx":
+        if not _has("mlx_whisper"):
             err_console.print(
-                "[bold red]Error:[/bold red] faster-whisper is required for --download-only. "
-                "Run: [cyan]pip install agent-cli\\[whisper][/cyan] "
-                "or [cyan]uv sync --extra whisper[/cyan]",
+                "[bold red]Error:[/bold red] MLX Whisper backend requires mlx-whisper. "
+                "Run: [cyan]pip install mlx-whisper[/cyan]",
             )
             raise typer.Exit(1)
         return
-    if backend == "mlx":
-        if not _has("mlx_whisper"):
+    if backend == "transformers":
+        if not _has("transformers") or not _has("torch"):
             err_console.print(
-                "[bold red]Error:[/bold red] MLX Whisper backend requires mlx-whisper. "
-                "Run: [cyan]pip install mlx-whisper[/cyan]",
+                "[bold red]Error:[/bold red] Transformers backend requires transformers and torch. "
+                "Run: [cyan]pip install agent-cli\\[whisper-transformers][/cyan] "
+                "or [cyan]uv sync --extra whisper-transformers[/cyan]",
             )
             raise typer.Exit(1)
         return
     if not _has("faster_whisper"):
-        err_console.print(
-            "[bold red]Error:[/bold red] Whisper dependencies not installed. "
-            "Run: [cyan]pip install agent-cli\\[whisper][/cyan] "
-            "or [cyan]uv sync --extra whisper[/cyan]",
-        )
+        if download_only:
+            err_console.print(
+                "[bold red]Error:[/bold red] faster-whisper is required for --download-only. "
+                "Run: [cyan]pip install agent-cli\\[faster-whisper][/cyan] "
+                "or [cyan]uv sync --extra faster-whisper[/cyan]",
+            )
+        else:
+            err_console.print(
+                "[bold red]Error:[/bold red] Whisper dependencies not installed. "
+                "Run: [cyan]pip install agent-cli\\[faster-whisper][/cyan] "
+                "or [cyan]uv sync --extra faster-whisper[/cyan]",
+            )
         raise typer.Exit(1)
 @app.command("whisper")
-@requires_extras("server", "faster-whisper|mlx-whisper", "wyoming")
+@requires_extras("server", "faster-whisper|mlx-whisper|whisper-transformers", "wyoming")
 def whisper_cmd(  # noqa: PLR0912, PLR0915
     model: Annotated[
         list[str] | None,
@@ -299,7 +306,7 @@ def whisper_cmd(  # noqa: PLR0912, PLR0915
             "-b",
             help=(
                 "Inference backend: `auto` (faster-whisper on CUDA/CPU, MLX on Apple Silicon), "
-                "`faster-whisper`, `mlx`"
+                "`faster-whisper`, `mlx`, `transformers` (HuggingFace, supports safetensors)"
             ),
         ),
     ] = "auto",
@@ -331,7 +338,7 @@ def whisper_cmd(  # noqa: PLR0912, PLR0915
     # Setup Rich logging for consistent output
     setup_rich_logging(log_level)
-    valid_backends = ("auto", "faster-whisper", "mlx")
+    valid_backends = ("auto", "faster-whisper", "mlx", "transformers")
     if backend not in valid_backends:
         err_console.print(
             f"[bold red]Error:[/bold red] --backend must be one of: {', '.join(valid_backends)}",
@@ -339,7 +346,7 @@ def whisper_cmd(  # noqa: PLR0912, PLR0915
         raise typer.Exit(1)
     resolved_backend = backend
-    if backend == "auto" and not download_only:
+    if backend == "auto":
         from agent_cli.server.whisper.backends import detect_backend  # noqa: PLC0415
         resolved_backend = detect_backend()
@@ -370,13 +377,26 @@ def whisper_cmd(  # noqa: PLR0912, PLR0915
         for model_name in model:
             console.print(f"  Downloading [cyan]{model_name}[/cyan]...")
             try:
-                from faster_whisper import WhisperModel  # noqa: PLC0415
-                _ = WhisperModel(
-                    model_name,
-                    device="cpu",  # Don't need GPU for download
-                    download_root=str(cache_dir) if cache_dir else None,
-                )
+                if resolved_backend == "transformers":
+                    from agent_cli.server.whisper.backends.transformers import (  # noqa: PLC0415
+                        download_model as download_transformers_model,
+                    )
+                    download_transformers_model(model_name, cache_dir=cache_dir)
+                elif resolved_backend == "mlx":
+                    from agent_cli.server.whisper.backends.mlx import (  # noqa: PLC0415
+                        download_model as download_mlx_model,
+                    )
+                    download_mlx_model(model_name)
+                else:
+                    from faster_whisper import WhisperModel  # noqa: PLC0415
+                    _ = WhisperModel(
+                        model_name,
+                        device="cpu",  # Don't need GPU for download
+                        download_root=str(cache_dir) if cache_dir else None,
+                    )
                 console.print(f"  [green]✓[/green] Downloaded {model_name}")
             except Exception as e:
                 err_console.print(f"  [red]✗[/red] Failed to download {model_name}: {e}")

agent_cli/server/whisper/backends/__init__.py CHANGED Viewed

@@ -17,7 +17,7 @@ from agent_cli.server.whisper.backends.base import (
 logger = logging.getLogger(__name__)
-BackendType = Literal["faster-whisper", "mlx", "auto"]
+BackendType = Literal["faster-whisper", "mlx", "transformers", "auto"]
 def detect_backend() -> Literal["faster-whisper", "mlx"]:
@@ -76,6 +76,13 @@ def create_backend(
         return FasterWhisperBackend(config)
+    if backend_type == "transformers":
+        from agent_cli.server.whisper.backends.transformers import (  # noqa: PLC0415
+            TransformersWhisperBackend,
+        )
+        return TransformersWhisperBackend(config)
     msg = f"Unknown backend type: {backend_type}"
     raise ValueError(msg)

agent_cli/server/whisper/backends/mlx.py CHANGED Viewed

@@ -55,6 +55,16 @@ def _resolve_mlx_model_name(model_name: str) -> str:
     return model_name
+def download_model(model_name: str) -> str:
+    """Download an MLX Whisper model and return the resolved repo name."""
+    import mlx.core as mx  # noqa: PLC0415
+    from mlx_whisper.transcribe import ModelHolder  # noqa: PLC0415
+    resolved_model = _resolve_mlx_model_name(model_name)
+    ModelHolder.get_model(resolved_model, mx.float16)
+    return resolved_model
 def _pcm_to_float(audio_bytes: bytes) -> NDArray[np.float32]:
     """Convert 16-bit PCM audio bytes to float32 array normalized to [-1, 1]."""
     import numpy as np  # noqa: PLC0415

agent_cli/server/whisper/backends/transformers.py ADDED Viewed

@@ -0,0 +1,315 @@
+"""Transformers Whisper backend for HuggingFace models with safetensors support."""
+from __future__ import annotations
+import asyncio
+import logging
+import tempfile
+import time
+import wave
+from concurrent.futures import ProcessPoolExecutor
+from dataclasses import dataclass
+from multiprocessing import get_context
+from pathlib import Path
+from typing import Any, Literal
+from agent_cli.core.process import set_process_title
+from agent_cli.server.whisper.backends.base import (
+    BackendConfig,
+    TranscriptionResult,
+)
+logger = logging.getLogger(__name__)
+# Model name mapping: canonical name -> HuggingFace repo
+_MODEL_MAP: dict[str, str] = {
+    "tiny": "openai/whisper-tiny",
+    "tiny.en": "openai/whisper-tiny.en",
+    "base": "openai/whisper-base",
+    "base.en": "openai/whisper-base.en",
+    "small": "openai/whisper-small",
+    "small.en": "openai/whisper-small.en",
+    "medium": "openai/whisper-medium",
+    "medium.en": "openai/whisper-medium.en",
+    "large": "openai/whisper-large",
+    "large-v2": "openai/whisper-large-v2",
+    "large-v3": "openai/whisper-large-v3",
+    "large-v3-turbo": "openai/whisper-large-v3-turbo",
+    "turbo": "openai/whisper-large-v3-turbo",
+    # Distil variants (smaller, faster)
+    "distil-large-v2": "distil-whisper/distil-large-v2",
+    "distil-large-v3": "distil-whisper/distil-large-v3",
+    "distil-medium.en": "distil-whisper/distil-medium.en",
+    "distil-small.en": "distil-whisper/distil-small.en",
+}
+def _resolve_model_name(model_name: str) -> str:
+    """Resolve a model name to a HuggingFace repo."""
+    if "/" in model_name:
+        return model_name
+    return _MODEL_MAP.get(model_name, f"openai/whisper-{model_name}")
+def download_model(model_name: str, cache_dir: Path | None = None) -> str:
+    """Download a Whisper model from the HuggingFace Hub.
+    Returns the resolved repo name.
+    """
+    from huggingface_hub import snapshot_download  # noqa: PLC0415
+    resolved_model = _resolve_model_name(model_name)
+    snapshot_download(
+        repo_id=resolved_model,
+        cache_dir=str(cache_dir) if cache_dir else None,
+    )
+    return resolved_model
+# --- Subprocess state (only used within subprocess worker) ---
+@dataclass
+class _SubprocessState:
+    """Container for subprocess-local state. Not shared with main process."""
+    model: Any = None
+    processor: Any = None
+    device: str | None = None
+_state = _SubprocessState()
+# --- Subprocess worker functions (run in isolated process) ---
+def _load_model_in_subprocess(
+    model_name: str,
+    device: str,
+    download_root: str | None,
+) -> str:
+    """Load model in subprocess. Returns actual device string."""
+    import torch  # noqa: PLC0415
+    from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor  # noqa: PLC0415
+    set_process_title("whisper-transformers")
+    # Resolve device
+    if device == "auto":
+        if torch.cuda.is_available():
+            device = "cuda"
+        elif hasattr(torch.backends, "mps") and torch.backends.mps.is_available():
+            device = "mps"
+        else:
+            device = "cpu"
+    _state.processor = AutoProcessor.from_pretrained(
+        model_name,
+        cache_dir=download_root,
+    )
+    dtype = torch.float16 if device != "cpu" else torch.float32
+    _state.model = AutoModelForSpeechSeq2Seq.from_pretrained(
+        model_name,
+        cache_dir=download_root,
+        torch_dtype=dtype,
+        low_cpu_mem_usage=True,
+    )
+    _state.model.to(device)
+    _state.model.eval()
+    _state.device = device
+    return device
+def _transcribe_in_subprocess(kwargs: dict[str, Any]) -> dict[str, Any]:
+    """Run transcription in subprocess. Reuses model from _state."""
+    import torch  # noqa: PLC0415
+    if _state.model is None or _state.processor is None:
+        msg = "Model not loaded in subprocess. Call _load_model_in_subprocess first."
+        raise RuntimeError(msg)
+    # Parse WAV and extract audio
+    with wave.open(kwargs.pop("wav_path"), "rb") as wav_file:
+        sample_rate = wav_file.getframerate()
+        audio_bytes = wav_file.readframes(wav_file.getnframes())
+        duration = wav_file.getnframes() / sample_rate
+    # Convert to float tensor (copy buffer to avoid non-writable tensor warning)
+    import numpy as np  # noqa: PLC0415
+    audio_array = np.frombuffer(audio_bytes, dtype=np.int16).astype(np.float32) / 32768.0
+    audio_tensor = torch.from_numpy(audio_array)
+    # Process audio
+    inputs = _state.processor(
+        audio_tensor,
+        sampling_rate=sample_rate,
+        return_tensors="pt",
+    )
+    inputs = {k: v.to(_state.device) for k, v in inputs.items()}
+    language = kwargs.get("language")
+    task = kwargs.get("task", "transcribe")
+    initial_prompt = kwargs.get("initial_prompt")
+    # Build generate arguments - use language/task directly instead of deprecated forced_decoder_ids
+    generate_args: dict[str, Any] = {
+        **inputs,
+        "num_beams": kwargs.get("beam_size", 5),
+        "task": task,
+        "return_timestamps": False,
+    }
+    # Add attention_mask if available (avoids warning about pad token)
+    if "attention_mask" not in generate_args:
+        generate_args["attention_mask"] = inputs.get(
+            "attention_mask",
+            torch.ones_like(inputs["input_features"][:, 0, :]),
+        )
+    if language:
+        generate_args["language"] = language
+    if initial_prompt:
+        prompt_ids = (
+            _state.processor.tokenizer(
+                initial_prompt,
+                return_tensors="pt",
+                add_special_tokens=False,
+            )
+            .input_ids[0]
+            .to(_state.device)
+        )
+        generate_args["prompt_ids"] = prompt_ids
+    with torch.no_grad():
+        generated_ids = _state.model.generate(**generate_args)
+        text = _state.processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
+    return {
+        "text": text.strip(),
+        "language": language or "en",
+        "language_probability": 1.0 if language else 0.95,
+        "duration": duration,
+        "segments": [],
+    }
+class TransformersWhisperBackend:
+    """Whisper backend using HuggingFace transformers.
+    Supports loading models from safetensors format.
+    Uses subprocess isolation for memory management.
+    """
+    def __init__(self, config: BackendConfig) -> None:
+        """Initialize the backend."""
+        self._config = config
+        self._resolved_model = _resolve_model_name(config.model_name)
+        self._executor: ProcessPoolExecutor | None = None
+        self._device: str | None = None
+    @property
+    def is_loaded(self) -> bool:
+        """Check if the model is loaded."""
+        return self._executor is not None
+    @property
+    def device(self) -> str | None:
+        """Get the device the model is on."""
+        return self._device
+    async def load(self) -> float:
+        """Start subprocess and load model."""
+        logger.debug(
+            "Starting transformers subprocess for model %s (resolved: %s, device=%s)",
+            self._config.model_name,
+            self._resolved_model,
+            self._config.device,
+        )
+        start_time = time.time()
+        ctx = get_context("spawn")
+        self._executor = ProcessPoolExecutor(max_workers=1, mp_context=ctx)
+        download_root = str(self._config.cache_dir) if self._config.cache_dir else None
+        loop = asyncio.get_running_loop()
+        self._device = await loop.run_in_executor(
+            self._executor,
+            _load_model_in_subprocess,
+            self._resolved_model,
+            self._config.device,
+            download_root,
+        )
+        load_duration = time.time() - start_time
+        logger.info(
+            "Model %s loaded on %s in %.2fs",
+            self._config.model_name,
+            self._device,
+            load_duration,
+        )
+        return load_duration
+    async def unload(self) -> None:
+        """Shutdown subprocess, releasing ALL memory."""
+        if self._executor is None:
+            return
+        logger.debug(
+            "Shutting down transformers subprocess for model %s",
+            self._config.model_name,
+        )
+        self._executor.shutdown(wait=False, cancel_futures=True)
+        self._executor = None
+        self._device = None
+        logger.info("Model %s unloaded (subprocess terminated)", self._config.model_name)
+    async def transcribe(
+        self,
+        audio: bytes,
+        *,
+        source_filename: str | None = None,  # noqa: ARG002
+        language: str | None = None,
+        task: Literal["transcribe", "translate"] = "transcribe",
+        initial_prompt: str | None = None,
+        temperature: float = 0.0,  # noqa: ARG002 - not used by transformers
+        vad_filter: bool = True,  # noqa: ARG002 - not supported
+        word_timestamps: bool = False,  # noqa: ARG002 - not supported
+    ) -> TranscriptionResult:
+        """Transcribe audio using transformers in subprocess."""
+        if self._executor is None:
+            msg = "Model not loaded. Call load() first."
+            raise RuntimeError(msg)
+        # Write audio to temp file for wave parsing in subprocess
+        with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp:
+            tmp.write(audio)
+            tmp_path = tmp.name
+        kwargs: dict[str, Any] = {
+            "wav_path": tmp_path,
+            "language": language,
+            "task": task,
+            "initial_prompt": initial_prompt,
+        }
+        try:
+            loop = asyncio.get_running_loop()
+            result = await loop.run_in_executor(
+                self._executor,
+                _transcribe_in_subprocess,
+                kwargs,
+            )
+        finally:
+            Path(tmp_path).unlink(missing_ok=True)
+        return TranscriptionResult(
+            text=result["text"],
+            language=result["language"],
+            language_probability=result["language_probability"],
+            duration=result["duration"],
+            segments=result["segments"],
+        )

{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agent-cli
-Version: 0.74.0
+Version: 0.75.0
 Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
 Project-URL: Homepage, https://github.com/basnijholt/agent-cli
 Author-email: Bas Nijholt <bas@nijho.lt>
@@ -84,6 +84,10 @@ Requires-Dist: pytest-timeout; extra == 'test'
 Requires-Dist: pytest>=7.0.0; extra == 'test'
 Provides-Extra: vad
 Requires-Dist: silero-vad-lite>=0.2.1; extra == 'vad'
+Provides-Extra: whisper-transformers
+Requires-Dist: fastapi[standard]; extra == 'whisper-transformers'
+Requires-Dist: torch>=2.0.0; extra == 'whisper-transformers'
+Requires-Dist: transformers>=4.30.0; extra == 'whisper-transformers'
 Provides-Extra: wyoming
 Requires-Dist: wyoming>=1.5.2; extra == 'wyoming'
 Description-Content-Type: text/markdown
@@ -424,7 +428,7 @@ Our installation scripts automatically handle all dependencies:
 |---------|---------|-----------------|
 | **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model |
 | **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` |
-| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[whisper]"` |
+| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[faster-whisper]"` |
 | **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` |
 | **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later |
 | **[Wyoming openWakeWord](https://github.com/rhasspy/wyoming-openwakeword)** | Wake word detection | ✅ Yes, for `assistant` |
@@ -518,6 +522,7 @@ agent-cli install-extras rag memory vad
   • kokoro - Kokoro neural TTS engine
   • faster-whisper - Whisper ASR for CUDA/CPU
   • mlx-whisper - Whisper ASR for Apple Silicon
+  • whisper-transformers - Whisper ASR via HuggingFace transformers (safetensors)
   • wyoming - Wyoming protocol for ASR/TTS servers
   • server - FastAPI server components
   • speed - Audio speed adjustment
@@ -534,7 +539,8 @@ agent-cli install-extras rag memory vad
 ╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
 │   extras      [EXTRAS]...  Extras to install: rag, memory, vad, audio, piper, kokoro,  │
-│                            faster-whisper, mlx-whisper, wyoming, server, speed, llm    │
+│                            faster-whisper, mlx-whisper, whisper-transformers, wyoming, │
+│                            server, speed, llm                                          │
 ╰────────────────────────────────────────────────────────────────────────────────────────╯
 ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
 │ --list  -l        Show available extras with descriptions (what each one enables)      │
@@ -630,8 +636,8 @@ the `[defaults]` section of your configuration file.
 ```toml
 [defaults]
 # llm_provider = "ollama"  # 'ollama', 'openai', or 'gemini'
-# asr_provider = "wyoming" # 'wyoming' or 'openai'
-# tts_provider = "wyoming" # 'wyoming', 'openai', or 'kokoro'
+# asr_provider = "wyoming" # 'wyoming', 'openai', or 'gemini'
+# tts_provider = "wyoming" # 'wyoming', 'openai', 'kokoro', or 'gemini'
 # openai_api_key = "sk-..."
 # gemini_api_key = "..."
 ```
@@ -643,7 +649,7 @@ the `[defaults]` section of your configuration file.
 **Workflow:** This is a simple, one-shot command.
 1.  It reads text from your system clipboard (or from a direct argument).
-2.  It sends the text to a local Ollama LLM with a prompt to perform only technical corrections.
+2.  It sends the text to your configured LLM provider (default: Ollama) with a prompt to perform only technical corrections.
 3.  The corrected text is copied back to your clipboard, replacing the original.
 **How to Use It:** This tool is ideal for integrating with a system-wide hotkey.

{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/RECORD RENAMED Viewed

@@ -1,6 +1,6 @@
 agent_cli/__init__.py,sha256=-bo57j_5TsCug2tGHh7wClAGDhzN239639K40pgVh4g,187
 agent_cli/__main__.py,sha256=2wx_SxA8KRdejM-hBFLN8JTR2rIgtwnDH03MPAbJH5U,106
-agent_cli/_extras.json,sha256=VgOqKwDE7d8nokLuzx8tsbJyFFe045ZxQ4yUqTz56OA,969
+agent_cli/_extras.json,sha256=bLtE8Xit-AFL4drPlcaA5jTg5oxGbUV9HAuCd9s-2JE,1200
 agent_cli/_tools.py,sha256=u9Ww-k-sbwFnMTW8sreFGd71nJP6o5hKcM0Zd_D9GZk,12136
 agent_cli/api.py,sha256=FQ_HATc7DaedbEFQ275Z18wV90tkDByD_9x_K0wdSLQ,456
 agent_cli/cli.py,sha256=O3b5Bgv6mjzSIMKikRfeUEg1SSVXhCskLatltbx0ERg,3923
@@ -8,7 +8,7 @@ agent_cli/config.py,sha256=dgwDV6chrQzGnVZIJ0OOg26jFKLCGIInC4Q9oXcj3rM,15413
 agent_cli/config_cmd.py,sha256=PkFY-U09LRIFYrHL_kG4_Ge6DjCWFe3GkO_uiIBMTgI,10359
 agent_cli/constants.py,sha256=-Q17N6qKIGqPDsu3FxpIKP33G0Cs0RUJlHwYNHxVxek,843
 agent_cli/docs_gen.py,sha256=ZX2GYHNumpLhdAEc_4Hy6xeAahAzsEVVnsXUojMYVNY,13885
-agent_cli/example-config.toml,sha256=xd9BXeOqdYx4xFJt58VBs2I49ESy6dF4-mWF_g8sM9o,7552
+agent_cli/example-config.toml,sha256=upxiTAi8FV5rjrm5IBrnz4YDDC5nXA-DUzYBvWoPHM4,7616
 agent_cli/opts.py,sha256=XgVFv-ip5lkFJNyBGHewCBQc4YaLJUSijIsP1qiqcts,13405
 agent_cli/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agent_cli/_requirements/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -23,6 +23,7 @@ agent_cli/_requirements/rag.txt,sha256=_MPH-PuDSU90J9EXHTJzN9M34ogmkHhsJ2e-Cimir
 agent_cli/_requirements/server.txt,sha256=v29ib07fYE5_lbL00ULOgS13XA5NAOnLq-lExJZ0zbw,3004
 agent_cli/_requirements/speed.txt,sha256=KwBTrZFXWtgwJ5zrcNtm45zfqvNK0trcR1SbV-wDFp4,1464
 agent_cli/_requirements/vad.txt,sha256=Jqw49D5xleqrFyv348GjqRmflewOOEYJklx7b9GbNpY,1359
+agent_cli/_requirements/whisper-transformers.txt,sha256=akxlIsNmHlldeyYJL3_ixCI35oSwWPURv9shiVZlNWs,6097
 agent_cli/_requirements/wyoming.txt,sha256=qsse6lSGYKxnzOYXpS6zIkZ7OG4-E4GH13gfBPuvoY0,1355
 agent_cli/agents/__init__.py,sha256=c1rnncDW5pBvP6BiLzFVpLWDNZzFRaUA7-a97avFVAs,321
 agent_cli/agents/_voice_agent_common.py,sha256=PUAztW84Xf9U7d0C_K5cL7I8OANIE1H6M8dFD_cRqps,4360
@@ -92,7 +93,7 @@ agent_cli/dev/terminals/warp.py,sha256=j-Jvz_BbWYC3QfLrvl4CbDh03c9OGRFmuCzjyB2ud
 agent_cli/dev/terminals/zellij.py,sha256=GnQnopimb9XH67CZGHjnbVWpVSWhaLCATGJizCT5TkY,2321
 agent_cli/install/__init__.py,sha256=JQPrOrtdNd1Y1NmQDkb3Nmm1qdyn3kPjhQwy9D8ryjI,124
 agent_cli/install/common.py,sha256=WvnmcjnFTW0d1HZrKVGzj5Tg3q8Txk_ZOdc4a1MBFWI,3121
-agent_cli/install/extras.py,sha256=bezcf5RgieNIaCq-Vs2CTTgho1wCwzZjky6zv-iKnnI,7313
+agent_cli/install/extras.py,sha256=xQ-0A-8X1n6X9ufLG2wPg6UA0Y34M_Zwc27Btc_QKvE,7438
 agent_cli/install/hotkeys.py,sha256=Y7jjtbyjVzIXL1_aczJmOyjL0ud2epbrFbzuWlObqZY,2324
 agent_cli/install/services.py,sha256=mgFjNqvvZ9U2dJ_PcEVWcenlaOtdIZ5a-RyDofDqooY,4209
 agent_cli/memory/__init__.py,sha256=8XNpVzP-qjF8o49A_eXsH_Rbp_FmxTIcknnvxq7vHms,162
@@ -140,7 +141,7 @@ agent_cli/scripts/setup-macos.sh,sha256=iKWhhONLGDTqYawSDqutnl0mfQomSVPPAsx09-0E
 agent_cli/scripts/setup-windows.ps1,sha256=NhyxOuwCjjSw24q2QOqggATos_n06DDbfvMQWuAB3tM,2938
 agent_cli/scripts/start-all-services-windows.ps1,sha256=uOODaPFzniEU7asDgMyf5MEOWcEFsGg_mCLLlDgKoa8,2643
 agent_cli/scripts/start-all-services.sh,sha256=c6pjXoyoQkeF-cYpldeMMo38XxRMmS43FHG5w3ElLxg,7756
-agent_cli/scripts/sync_extras.py,sha256=mg6XA6y5IjBSsXdPTpBMdzavX_-EjO3C250xoztpVdw,4767
+agent_cli/scripts/sync_extras.py,sha256=bxfj88pRN2Uojyd8ubhtlzFCMrXvXmaXlFjzdNLiayw,5179
 agent_cli/scripts/.runtime/.gitkeep,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 agent_cli/scripts/linux-hotkeys/README.md,sha256=OW48Xyv096XkUosSJkzED_nnEEncSzhl87FNgEfq8wg,2037
 agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh,sha256=sme-dil3EU4nkdRwxSvARr-hBN9UjrU1IFabLCrvwl8,1251
@@ -157,7 +158,7 @@ agent_cli/scripts/nvidia-asr-server/server.py,sha256=kPNQIVF3exblvqMtIVk38Y6sZy2
 agent_cli/scripts/nvidia-asr-server/shell.nix,sha256=IT20j5YNj_wc7MdXi7ndogGodDNSGwyq8G0bNoZEpmg,1003
 agent_cli/scripts/nvidia-asr-server/uv.lock,sha256=5WWaqWOuV_moMPC-LIZK-A-Y5oaHr1tUn_vbR-IupzY,728608
 agent_cli/server/__init__.py,sha256=NZuJHlLHck9KWrepNZHrJONptYCQI9P-uTqknSFI5Ds,71
-agent_cli/server/cli.py,sha256=p2oGLvLnFXc0ohERkXmal6TpLoceN6RPHvMGc0xScqs,26283
+agent_cli/server/cli.py,sha256=dBg9Iy8BGthxvW_ImYweauQJVKdnqwUkl0EbFvOR-K4,27417
 agent_cli/server/common.py,sha256=hBBp6i-2-yhDY260ffwmFBg_ndcoT5SNcfa6uFyP7Vc,6391
 agent_cli/server/model_manager.py,sha256=93l_eeZeqnPALyDIK24or61tvded9TbM8tnde0okVjY,9225
 agent_cli/server/model_registry.py,sha256=KrRV1XxbFYuXu5rJlHFh6PTl_2BKiWnWsaNrf-0c6wQ,6988
@@ -179,18 +180,19 @@ agent_cli/server/whisper/languages.py,sha256=Tv3qsIOSQQLxw-v5Wy41jSS6uHG_YBiG-T2
 agent_cli/server/whisper/model_manager.py,sha256=LI92mkueu8o8m6AhzlUaaIWygnZucJa295-j7ymx7Ss,4925
 agent_cli/server/whisper/model_registry.py,sha256=qoRkB0ex6aRtUlsUN5BGik-oIZlwJbVHGQKaCbf_yVg,789
 agent_cli/server/whisper/wyoming_handler.py,sha256=HjN565YfDHeVfaGjQfoy9xjCZPx_TvYvjRYgbKn3aOI,6634
-agent_cli/server/whisper/backends/__init__.py,sha256=DfgyigwMLKiSfaRzZ1TTeV7fa9HWSmT1UJnJa-6el7k,2338
+agent_cli/server/whisper/backends/__init__.py,sha256=YzS5g1PAlKi6k00u0iEH_0uRclvO5iVJVw8CkxSk9wk,2581
 agent_cli/server/whisper/backends/base.py,sha256=gQi5EyMCFS464mKXGIKbh1vgtBm99eNkf93SCIYRYg0,2597
 agent_cli/server/whisper/backends/faster_whisper.py,sha256=GN51L-qBjH-YU8ASiu317NrkMKMsK_znXDOTxi90EzU,6966
-agent_cli/server/whisper/backends/mlx.py,sha256=wSkD9wL3K8PvQToJ5qkTj_HZQJD9Brs_bjNz-X0Sku8,9328
+agent_cli/server/whisper/backends/mlx.py,sha256=5wHIvGDanA4-D_HUQSDJQS2DeuTg2x59z4hNAx-P0pg,9698
+agent_cli/server/whisper/backends/transformers.py,sha256=4t3gj2AcEqQY3w3ZlzVke10XCVbJk2vPeaGxxNP8tv0,10067
 agent_cli/services/__init__.py,sha256=5FyGCOS2Zpx4e2QWi1ppg9zm0hl8UU4J_id_g8LqGh4,11305
 agent_cli/services/_wyoming_utils.py,sha256=pKPa4fOSdqcG3-kNHJOHHsMnZ1yZJZi6XohVwjAwabo,1971
 agent_cli/services/asr.py,sha256=aRaCLVCygsJ15qyQEPECOZsdSrnlLPbyY4RwAqY0qIw,17258
 agent_cli/services/llm.py,sha256=i01utl1eYWlM13gvW2eR6ErL_ndH-g0d-BSleZra_7k,7229
 agent_cli/services/tts.py,sha256=NX5Qnq7ddLI3mwm3nzhbR3zB1Os4Ip4sSVSjDZDTBcI,14855
 agent_cli/services/wake_word.py,sha256=JFJ1SA22H4yko9DXiQ1t7fcoxeALLAe3iBrLs0z8rX4,4664
-agent_cli-0.74.0.dist-info/METADATA,sha256=N-C1J_7Lh_FI_9zc3lBG3TopLB7M9gJ8edQXhFOXI80,180573
-agent_cli-0.74.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
-agent_cli-0.74.0.dist-info/entry_points.txt,sha256=FUv-fB2atLsPUk_RT4zqnZl1coz4_XHFwRALOKOF38s,97
-agent_cli-0.74.0.dist-info/licenses/LICENSE,sha256=majJU6S9kC8R8bW39NVBHyv32Dq50FL6TDxECG2WVts,1068
-agent_cli-0.74.0.dist-info/RECORD,,
+agent_cli-0.75.0.dist-info/METADATA,sha256=3xYOYaJno_r35V1RGawNfVhMrgwkxj94LckVUjbJMV0,181041
+agent_cli-0.75.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
+agent_cli-0.75.0.dist-info/entry_points.txt,sha256=FUv-fB2atLsPUk_RT4zqnZl1coz4_XHFwRALOKOF38s,97
+agent_cli-0.75.0.dist-info/licenses/LICENSE,sha256=majJU6S9kC8R8bW39NVBHyv32Dq50FL6TDxECG2WVts,1068
+agent_cli-0.75.0.dist-info/RECORD,,

{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{agent_cli-0.74.0.dist-info → agent_cli-0.75.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

agent-cli 0.74.0__py3-none-any.whl → 0.75.0__py3-none-any.whl

agent-cli 0.74.0py3-none-any.whl → 0.75.0py3-none-any.whl