PyPI - agent-cli - Versions diffs - 0.70.5__py3-none-any.whl → 0.72.1__py3-none-any.whl - Mend

agent-cli 0.70.5py3-none-any.whl → 0.72.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

agent_cli/_extras.json +2 -2
agent_cli/_requirements/memory.txt +14 -1
agent_cli/_requirements/rag.txt +14 -1
agent_cli/_requirements/vad.txt +1 -85
agent_cli/agents/assistant.py +23 -27
agent_cli/agents/autocorrect.py +29 -3
agent_cli/agents/chat.py +44 -14
agent_cli/agents/memory/__init__.py +19 -1
agent_cli/agents/memory/add.py +3 -3
agent_cli/agents/memory/proxy.py +20 -11
agent_cli/agents/rag_proxy.py +42 -10
agent_cli/agents/speak.py +22 -2
agent_cli/agents/transcribe.py +20 -2
agent_cli/agents/transcribe_daemon.py +33 -21
agent_cli/agents/voice_edit.py +17 -9
agent_cli/cli.py +25 -2
agent_cli/config_cmd.py +30 -11
agent_cli/core/deps.py +6 -3
agent_cli/core/vad.py +6 -24
agent_cli/dev/cli.py +295 -65
agent_cli/docs_gen.py +18 -8
agent_cli/install/extras.py +44 -13
agent_cli/install/hotkeys.py +22 -11
agent_cli/install/services.py +54 -14
agent_cli/opts.py +25 -21
agent_cli/server/cli.py +121 -47
{agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/METADATA +466 -195
{agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/RECORD +31 -31
{agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/WHEEL +0 -0
{agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/entry_points.txt +0 -0
{agent_cli-0.70.5.dist-info → agent_cli-0.72.1.dist-info}/licenses/LICENSE +0 -0

agent_cli/agents/rag_proxy.py CHANGED Viewed

@@ -23,12 +23,12 @@ from agent_cli.core.utils import (
 def rag_proxy(
     docs_folder: Path = typer.Option(  # noqa: B008
         "./rag_docs",
-        help="Folder to watch for documents",
+        help="Folder to watch for documents. Files are auto-indexed on startup and when changed. Must not overlap with `--chroma-path`.",
         rich_help_panel="RAG Configuration",
     ),
     chroma_path: Path = typer.Option(  # noqa: B008
         "./rag_db",
-        help="Path to ChromaDB persistence directory",
+        help="ChromaDB storage directory for vector embeddings. Must be separate from `--docs-folder` to avoid indexing database files.",
         rich_help_panel="RAG Configuration",
     ),
     openai_base_url: str | None = opts.OPENAI_BASE_URL,
@@ -36,30 +36,62 @@ def rag_proxy(
     openai_api_key: str | None = opts.OPENAI_API_KEY,
     limit: int = typer.Option(
         3,
-        help="Number of document chunks to retrieve per query.",
+        help="Number of document chunks to retrieve per query. Higher values provide more context but use more tokens. Can be overridden per-request via `rag_top_k` in the JSON body.",
         rich_help_panel="RAG Configuration",
     ),
     host: str = opts.SERVER_HOST,
     port: int = typer.Option(
         8000,
-        help="Port to bind to",
+        help="Port for the RAG proxy API (e.g., `http://localhost:8000/v1/chat/completions`).",
         rich_help_panel="Server Configuration",
     ),
-    log_level: opts.LogLevel = opts.LOG_LEVEL,
+    log_level: opts.LogLevel = opts.SERVER_LOG_LEVEL,
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
     enable_rag_tools: bool = typer.Option(
         True,  # noqa: FBT003
         "--rag-tools/--no-rag-tools",
-        help="Allow agent to fetch full documents when snippets are insufficient.",
+        help="Enable `read_full_document()` tool so the LLM can request full document content when retrieved snippets are insufficient. Can be overridden per-request via `rag_enable_tools` in the JSON body.",
         rich_help_panel="RAG Configuration",
     ),
 ) -> None:
-    """Start the RAG (Retrieval-Augmented Generation) Proxy Server.
+    """Start a RAG proxy server that enables "chat with your documents".
-    This server watches a folder for documents, indexes them, and provides
-    an OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
-    injecting relevant context from the documents.
+    Watches a folder for documents, indexes them into a vector store, and provides an
+    OpenAI-compatible API at `/v1/chat/completions`. When you send a chat request,
+    the server retrieves relevant document chunks and injects them as context before
+    forwarding to your LLM backend.
+    **Quick start:**
+    - `agent-cli rag-proxy` — Start with defaults (./rag_docs, OpenAI-compatible API)
+    - `agent-cli rag-proxy --docs-folder ~/notes` — Index your notes folder
+    **How it works:**
+    1. Documents in `--docs-folder` are chunked, embedded, and stored in ChromaDB
+    2. A file watcher auto-reindexes when files change
+    3. Chat requests trigger a semantic search for relevant chunks
+    4. Retrieved context is injected into the prompt before forwarding to the LLM
+    5. Responses include a `rag_sources` field listing which documents were used
+    **Supported file formats:**
+    Text: `.txt`, `.md`, `.json`, `.py`, `.js`, `.ts`, `.yaml`, `.toml`, `.rst`, etc.
+    Rich documents (via MarkItDown): `.pdf`, `.docx`, `.pptx`, `.xlsx`, `.html`, `.csv`
+    **API endpoints:**
+    - `POST /v1/chat/completions` — Main chat endpoint (OpenAI-compatible)
+    - `GET /health` — Health check with configuration info
+    - `GET /files` — List indexed files with chunk counts
+    - `POST /reindex` — Trigger manual reindex
+    - All other paths are proxied to the LLM backend
+    **Per-request overrides (in JSON body):**
+    - `rag_top_k`: Override `--limit` for this request
+    - `rag_enable_tools`: Override `--rag-tools` for this request
     """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/speak.py CHANGED Viewed

@@ -86,7 +86,7 @@ def speak(
     *,
     text: str | None = typer.Argument(
         None,
-        help="Text to speak. Reads from clipboard if not provided.",
+        help="Text to synthesize. If not provided, reads from clipboard.",
         rich_help_panel="General Options",
     ),
     # --- Provider Selection ---
@@ -127,7 +127,27 @@ def speak(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Convert text to speech using Wyoming or OpenAI-compatible TTS server."""
+    """Convert text to speech and play audio through speakers.
+    By default, synthesized audio plays immediately. Use `--save-file` to save
+    to a WAV file instead (skips playback).
+    Text can be provided as an argument or read from clipboard automatically.
+    **Examples:**
+    Speak text directly:
+        `agent-cli speak "Hello, world!"`
+    Speak clipboard contents:
+        `agent-cli speak`
+    Save to file instead of playing:
+        `agent-cli speak "Hello" --save-file greeting.wav`
+    Use OpenAI-compatible TTS:
+        `agent-cli speak "Hello" --tts-provider openai`
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/transcribe.py CHANGED Viewed

@@ -471,7 +471,7 @@ def transcribe(  # noqa: PLR0912
     extra_instructions: str | None = typer.Option(
         None,
         "--extra-instructions",
-        help="Additional instructions for the LLM to process the transcription.",
+        help="Extra instructions appended to the LLM cleanup prompt (requires `--llm`).",
         rich_help_panel="LLM Configuration",
     ),
     from_file: Path | None = opts.FROM_FILE,
@@ -513,7 +513,25 @@ def transcribe(  # noqa: PLR0912
     print_args: bool = opts.PRINT_ARGS,
     transcription_log: Path | None = opts.TRANSCRIPTION_LOG,
 ) -> None:
-    """Wyoming ASR Client for streaming microphone audio to a transcription server."""
+    """Record audio from microphone and transcribe to text.
+    Records until you press Ctrl+C (or send SIGINT), then transcribes using your
+    configured ASR provider. The transcript is copied to the clipboard by default.
+    **With `--llm`**: Passes the raw transcript through an LLM to clean up speech
+    recognition errors, add punctuation, remove filler words, and improve readability.
+    **With `--toggle`**: Bind to a hotkey for push-to-talk. First call starts recording,
+    second call stops and transcribes.
+    **Examples**:
+    - Record and transcribe: `agent-cli transcribe`
+    - With LLM cleanup: `agent-cli transcribe --llm`
+    - Re-transcribe last recording: `agent-cli transcribe --last-recording 1`
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/transcribe_daemon.py CHANGED Viewed

@@ -296,45 +296,45 @@ def transcribe_daemon(  # noqa: PLR0912
         "user",
         "--role",
         "-r",
-        help="Role name for logging (e.g., 'meeting', 'notes', 'user').",
+        help="Label for log entries. Use to distinguish speakers or contexts in logs.",
     ),
     silence_threshold: float = typer.Option(
         1.0,
         "--silence-threshold",
         "-s",
-        help="Seconds of silence to end a speech segment.",
+        help="Seconds of silence after speech to finalize a segment. Increase for slower speakers.",
     ),
     min_segment: float = typer.Option(
         0.25,
         "--min-segment",
         "-m",
-        help="Minimum speech duration in seconds to trigger a segment.",
+        help="Minimum seconds of speech required before a segment is processed. Filters brief sounds.",
     ),
     vad_threshold: float = typer.Option(
         0.3,
         "--vad-threshold",
-        help="VAD speech detection threshold (0.0-1.0). Higher = more aggressive filtering.",
+        help="Silero VAD confidence threshold (0.0-1.0). Higher values require clearer speech; lower values are more sensitive to quiet/distant voices.",
     ),
     save_audio: bool = typer.Option(
         True,  # noqa: FBT003
         "--save-audio/--no-save-audio",
-        help="Save audio segments as MP3 files.",
+        help="Save each speech segment as MP3. Requires `ffmpeg` to be installed.",
     ),
     audio_dir: Path | None = typer.Option(  # noqa: B008
         None,
         "--audio-dir",
-        help="Directory for MP3 files. Default: ~/.config/agent-cli/audio",
+        help="Base directory for MP3 files. Files are organized by date: `YYYY/MM/DD/HHMMSS_mmm.mp3`. Default: `~/.config/agent-cli/audio`.",
     ),
     transcription_log: Path | None = typer.Option(  # noqa: B008
         None,
         "--transcription-log",
         "-t",
-        help="JSON Lines log file path. Default: ~/.config/agent-cli/transcriptions.jsonl",
+        help="JSONL file for transcript logging (one JSON object per line with timestamp, role, raw/processed text, audio path). Default: `~/.config/agent-cli/transcriptions.jsonl`.",
     ),
     clipboard: bool = typer.Option(
         False,  # noqa: FBT003
         "--clipboard/--no-clipboard",
-        help="Copy each transcription to clipboard.",
+        help="Copy each completed transcription to clipboard (overwrites previous). Useful with `--llm` to get cleaned text.",
     ),
     # --- Provider Selection ---
     asr_provider: str = opts.ASR_PROVIDER,
@@ -368,25 +368,37 @@ def transcribe_daemon(  # noqa: PLR0912
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Run a continuous transcription daemon with voice activity detection.
+    """Continuous transcription daemon using Silero VAD for speech detection.
-    This command runs indefinitely, capturing audio from your microphone,
-    detecting speech segments using Silero VAD, transcribing them, and
-    logging results with timestamps.
+    Unlike `transcribe` (single recording session), this daemon runs indefinitely
+    and automatically detects speech segments using Voice Activity Detection (VAD).
+    Each detected segment is transcribed and logged with timestamps.
-    Examples:
-        # Basic daemon
-        agent-cli transcribe-daemon
+    **How it works:**
-        # With role and custom silence threshold
-        agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
+    1. Listens continuously to microphone input
+    2. Silero VAD detects when you start/stop speaking
+    3. After `--silence-threshold` seconds of silence, the segment is finalized
+    4. Segment is transcribed (and optionally cleaned by LLM with `--llm`)
+    5. Results are appended to the JSONL log file
+    6. Audio is saved as MP3 if `--save-audio` is enabled (requires `ffmpeg`)
+    **Use cases:** Meeting transcription, note-taking, voice journaling, accessibility.
-        # With LLM cleanup
-        agent-cli transcribe-daemon --llm --role notes
+    **Examples:**
+        agent-cli transcribe-daemon
+        agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
+        agent-cli transcribe-daemon --llm --clipboard --role notes
+        agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --no-save-audio
+        agent-cli transcribe-daemon --asr-provider openai --llm-provider gemini --llm
-        # Custom log file and audio directory
-        agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
+    **Tips:**
+    - Use `--role` to tag entries (e.g., `speaker1`, `meeting`, `personal`)
+    - Adjust `--vad-threshold` if detection is too sensitive (increase) or missing speech (decrease)
+    - Use `--stop` to cleanly terminate a running daemon
+    - With `--llm`, transcripts are cleaned up (punctuation, filler words removed)
     """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/voice_edit.py CHANGED Viewed

@@ -229,15 +229,23 @@ def voice_edit(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Interact with clipboard text via a voice command using local or remote services.
-    Usage:
-    - Run in foreground: agent-cli voice-edit --input-device-index 1
-    - Run in background: agent-cli voice-edit --input-device-index 1 &
-    - Check status: agent-cli voice-edit --status
-    - Stop background process: agent-cli voice-edit --stop
-    - List output devices: agent-cli voice-edit --list-output-devices
-    - Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
+    """Edit or query clipboard text using voice commands.
+    **Workflow:** Captures clipboard text → records your voice command → transcribes
+    it → sends both to an LLM → copies result back to clipboard.
+    Use this for hands-free text editing (e.g., "make this more formal") or
+    asking questions about clipboard content (e.g., "summarize this").
+    **Typical hotkey integration:** Run `voice-edit &` on keypress to start
+    recording, then send SIGINT (via `--stop`) on second keypress to process.
+    **Examples:**
+    - Basic usage: `agent-cli voice-edit`
+    - With TTS response: `agent-cli voice-edit --tts`
+    - Toggle on/off: `agent-cli voice-edit --toggle`
+    - List audio devices: `agent-cli voice-edit --list-devices`
     """
     if print_args:
         print_command_line_args(locals())

agent_cli/cli.py CHANGED Viewed

@@ -14,9 +14,32 @@ from .config import load_config, normalize_provider_defaults
 from .core.process import set_process_title
 from .core.utils import console
+_HELP = """\
+AI-powered voice, text, and development tools.
+**Voice & Text:**
+- **Voice-to-text** - Transcribe speech with optional LLM cleanup
+- **Text-to-speech** - Convert text to natural-sounding audio
+- **Voice chat** - Conversational AI with memory and tool use
+- **Text correction** - Fix grammar, spelling, and punctuation
+**Development:**
+- **Parallel development** - Git worktrees with integrated coding agents
+- **Local servers** - ASR/TTS with Wyoming + OpenAI-compatible APIs,
+  MLX on macOS ARM, CUDA/CPU Whisper, and automatic model TTL
+**Provider Flexibility:**
+Mix local (Ollama, Wyoming) and cloud (OpenAI, Gemini) backends freely.
+Run `agent-cli <command> --help` for detailed command documentation.
+"""
 app = typer.Typer(
     name="agent-cli",
-    help="A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.",
+    help=_HELP,
     context_settings={"help_option_names": ["-h", "--help"]},
     add_completion=True,
     rich_markup_mode="markdown",
@@ -56,7 +79,7 @@ def main(
         ),
     ] = False,
 ) -> None:
-    """A suite of AI-powered tools."""
+    """AI-powered voice, text, and development tools."""
     if ctx.invoked_subcommand is None:
         console.print("[bold red]No command specified.[/bold red]")
         console.print("[bold yellow]Running --help for your convenience.[/bold yellow]")

agent_cli/config_cmd.py CHANGED Viewed

@@ -20,7 +20,17 @@ from agent_cli.core.utils import console
 config_app = typer.Typer(
     name="config",
-    help="Manage agent-cli configuration files.",
+    help="""Manage agent-cli configuration files.
+Config files are TOML format and searched in order:
+1. `./agent-cli-config.toml` (project-local)
+2. `~/.config/agent-cli/config.toml` (user default)
+Settings in `[defaults]` apply to all commands. Override per-command
+with sections like `[chat]` or `[transcribe]`. CLI arguments override
+config file settings.
+""",
     add_completion=True,
     rich_markup_mode="markdown",
     no_args_is_help=True,
@@ -40,30 +50,30 @@ CONFIG_PATH_OPTION: Path | None = typer.Option(
     None,
     "--path",
     "-p",
-    help="Path to config file. Uses auto-detection if not specified.",
+    help="Override auto-detection and use this config file path.",
 )
 CONFIG_PATH_INIT_OPTION: Path | None = typer.Option(
     None,
     "--path",
     "-p",
-    help="Custom path for config file. Default: ~/.config/agent-cli/config.toml",
+    help="Where to create the config file (default: `~/.config/agent-cli/config.toml`).",
 )
 FORCE_OPTION: bool = typer.Option(
     False,  # noqa: FBT003
     "--force",
     "-f",
-    help="Overwrite existing config without confirmation.",
+    help="Overwrite existing config without prompting for confirmation.",
 )
 RAW_OPTION: bool = typer.Option(
     False,  # noqa: FBT003
     "--raw",
     "-r",
-    help="Output raw file contents (for copy-paste).",
+    help="Print plain file contents without syntax highlighting or line numbers.",
 )
 JSON_OPTION: bool = typer.Option(
     False,  # noqa: FBT003
     "--json",
-    help="Output as JSON for automation.",
+    help="Output as JSON with `path`, `exists`, and `content` fields.",
 )
@@ -149,10 +159,13 @@ def config_init(
     path: Path | None = CONFIG_PATH_INIT_OPTION,
     force: bool = FORCE_OPTION,
 ) -> None:
-    """Create a new config file with all options commented out.
+    """Create a new config file with all options as commented-out examples.
-    The generated config file serves as a template showing all available
-    options. Uncomment and modify the options you want to customize.
+    Generates a TOML template with `[defaults]` for global settings and
+    command-specific sections like `[chat]`, `[transcribe]`, etc. Uncomment
+    and edit the options you want to customize.
+    Example: `agent-cli config init && agent-cli config edit`
     """
     target_path = _get_config_file(path) or USER_CONFIG_PATH
@@ -182,7 +195,9 @@ def config_edit(
 ) -> None:
     """Open the config file in your default editor.
-    The editor is determined by: $EDITOR > $VISUAL > platform default.
+    Editor preference: `$EDITOR` → `$VISUAL` → `nano`/`vim` → `vi` (or
+    `notepad` on Windows). If no config exists, run `agent-cli config init`
+    first.
     """
     config_file = _get_config_file(path)
@@ -234,7 +249,11 @@ def config_show(
     raw: bool = RAW_OPTION,
     json_output: bool = JSON_OPTION,
 ) -> None:
-    """Display the config file location and contents."""
+    """Display the active config file path and contents.
+    By default, shows syntax-highlighted TOML with line numbers. Use `--raw`
+    for plain output (useful for piping), or `--json` for programmatic access.
+    """
     config_file = _get_config_file(path)
     if config_file is None:

agent_cli/core/deps.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from __future__ import annotations
 import functools
+import importlib
 import json
 import os
 from importlib.util import find_spec
@@ -12,7 +13,7 @@ from typing import TYPE_CHECKING, TypeVar
 import typer
 from agent_cli.config import load_config
-from agent_cli.core.utils import console, print_error_message
+from agent_cli.core.utils import err_console, print_error_message
 if TYPE_CHECKING:
     from collections.abc import Callable
@@ -139,7 +140,7 @@ def _try_auto_install(missing: list[str]) -> bool:
         else:
             extras_to_install.append(extra)
-    console.print(
+    err_console.print(
         f"[yellow]Auto-installing missing extras: {', '.join(extras_to_install)}[/]",
     )
     return install_extras_programmatic(extras_to_install, quiet=True)
@@ -159,7 +160,9 @@ def _check_and_install_extras(extras: tuple[str, ...]) -> list[str]:
         print_error_message("Auto-install failed.\n" + get_combined_install_hint(missing))
         return missing
-    console.print("[green]Installation complete![/]")
+    err_console.print("[green]Installation complete![/]")
+    # Invalidate import caches so find_spec() can see newly installed packages
+    importlib.invalidate_caches()
     still_missing = [e for e in extras if not check_extra_installed(e)]
     if still_missing:
         print_error_message(

agent_cli/core/vad.py CHANGED Viewed

@@ -3,38 +3,22 @@
 from __future__ import annotations
 import logging
-import urllib.request
 from collections import deque
-from pathlib import Path
 from agent_cli import constants
 try:
     import numpy as np
-    import torch
+    from silero_vad_lite import SileroVAD
 except ImportError as e:
     msg = (
-        "silero-vad is required for the transcribe-daemon command. "
+        "silero-vad-lite is required for the transcribe-daemon command. "
         "Install it with: `pip install agent-cli[vad]` or `uv sync --extra vad`."
     )
     raise ImportError(msg) from e
 LOGGER = logging.getLogger(__name__)
-_SILERO_VAD_ONNX_URL = (
-    "https://github.com/snakers4/silero-vad/raw/master/src/silero_vad/data/silero_vad.onnx"
-)
-def _get_model_path() -> Path:
-    """Get the path to the Silero VAD ONNX model, downloading if needed."""
-    cache_dir = Path.home() / ".cache" / "silero-vad"
-    cache_dir.mkdir(parents=True, exist_ok=True)
-    model_path = cache_dir / "silero_vad.onnx"
-    if not model_path.exists():
-        urllib.request.urlretrieve(_SILERO_VAD_ONNX_URL, model_path)  # noqa: S310
-    return model_path
 class VoiceActivityDetector:
     """Silero VAD-based voice activity detection for audio segmentation.
@@ -56,8 +40,6 @@ class VoiceActivityDetector:
             msg = f"Sample rate must be 8000 or 16000, got {sample_rate}"
             raise ValueError(msg)
-        from silero_vad.utils_vad import OnnxWrapper  # noqa: PLC0415
         self.sample_rate = sample_rate
         self.threshold = threshold
         self.silence_threshold_ms = silence_threshold_ms
@@ -74,7 +56,7 @@ class VoiceActivityDetector:
         )
         # Model and state
-        self._model = OnnxWrapper(str(_get_model_path()))
+        self._model = SileroVAD(sample_rate=sample_rate)
         self._pre_speech_buffer: deque[bytes] = deque(maxlen=pre_speech_windows)
         self._pending = bytearray()
         self._audio_buffer = bytearray()
@@ -92,7 +74,7 @@ class VoiceActivityDetector:
     def reset(self) -> None:
         """Reset VAD state for a new recording session."""
-        self._model.reset_states()
+        self._model = SileroVAD(sample_rate=self.sample_rate)
         self._pre_speech_buffer.clear()
         self._pending.clear()
         self._audio_buffer.clear()
@@ -103,7 +85,7 @@ class VoiceActivityDetector:
     def _is_speech(self, window: bytes) -> bool:
         """Check if audio window contains speech."""
         audio = np.frombuffer(window, dtype=np.int16).astype(np.float32) / 32768.0
-        prob = float(self._model(torch.from_numpy(audio), self.sample_rate).item())
+        prob = self._model.process(audio)
         LOGGER.debug("Speech prob: %.3f, threshold: %.2f", prob, self.threshold)
         return prob >= self.threshold
@@ -154,7 +136,7 @@ class VoiceActivityDetector:
                     self._silence_samples = 0
                     self._speech_samples = 0
                     self._audio_buffer.clear()
-                    self._model.reset_states()
+                    self._model = SileroVAD(sample_rate=self.sample_rate)
             else:
                 # Not speaking - maintain rolling pre-speech buffer (auto-limited by deque maxlen)
                 self._pre_speech_buffer.append(window)

agent-cli 0.70.5__py3-none-any.whl → 0.72.1__py3-none-any.whl

agent-cli 0.70.5py3-none-any.whl → 0.72.1py3-none-any.whl