PyPI - agent-cli - Versions diffs - 0.70.5__py3-none-any.whl → 0.71.0__py3-none-any.whl - Mend

agent-cli 0.70.5py3-none-any.whl → 0.71.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (25) hide show

agent_cli/agents/assistant.py +23 -27
agent_cli/agents/autocorrect.py +29 -3
agent_cli/agents/chat.py +44 -14
agent_cli/agents/memory/__init__.py +19 -1
agent_cli/agents/memory/add.py +3 -3
agent_cli/agents/memory/proxy.py +19 -10
agent_cli/agents/rag_proxy.py +41 -9
agent_cli/agents/speak.py +22 -2
agent_cli/agents/transcribe.py +20 -2
agent_cli/agents/transcribe_daemon.py +33 -21
agent_cli/agents/voice_edit.py +17 -9
agent_cli/cli.py +25 -2
agent_cli/config_cmd.py +30 -11
agent_cli/dev/cli.py +295 -65
agent_cli/docs_gen.py +18 -8
agent_cli/install/extras.py +39 -10
agent_cli/install/hotkeys.py +22 -11
agent_cli/install/services.py +54 -14
agent_cli/opts.py +23 -20
agent_cli/server/cli.py +118 -44
{agent_cli-0.70.5.dist-info → agent_cli-0.71.0.dist-info}/METADATA +456 -187
{agent_cli-0.70.5.dist-info → agent_cli-0.71.0.dist-info}/RECORD +25 -25
{agent_cli-0.70.5.dist-info → agent_cli-0.71.0.dist-info}/WHEEL +0 -0
{agent_cli-0.70.5.dist-info → agent_cli-0.71.0.dist-info}/entry_points.txt +0 -0
{agent_cli-0.70.5.dist-info → agent_cli-0.71.0.dist-info}/licenses/LICENSE +0 -0

agent_cli/agents/assistant.py CHANGED Viewed

@@ -1,29 +1,4 @@
-r"""Wake word-based voice assistant that records when wake word is detected.
-This agent uses Wyoming wake word detection to implement a hands-free voice assistant that:
-1. Continuously listens for a wake word
-2. When the wake word is detected, starts recording user speech
-3. When the wake word is detected again, stops recording and processes the speech
-4. Sends the recorded speech to ASR for transcription
-5. Optionally processes the transcript with an LLM and speaks the response
-WORKFLOW:
-1. Agent starts listening for the specified wake word
-2. First wake word detection -> start recording user speech
-3. Second wake word detection -> stop recording and process the speech
-4. Transcribe the recorded speech using Wyoming ASR
-5. Optionally process with LLM and respond with TTS
-USAGE:
-- Start the agent: assistant --wake-word "ok_nabu" --input-device-index 1
-- The agent runs continuously until stopped with Ctrl+C or --stop
-- Uses background process management for daemon-like operation
-REQUIREMENTS:
-- Wyoming wake word server (e.g., wyoming-openwakeword)
-- Wyoming ASR server (e.g., wyoming-whisper)
-- Optional: Wyoming TTS server for responses
-"""
+"""Wake word-based voice assistant using Wyoming protocol services."""
 from __future__ import annotations
@@ -313,7 +288,28 @@ def assistant(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Wake word-based voice assistant using local or remote services."""
+    """Hands-free voice assistant using wake word detection.
+    Continuously listens for a wake word, then records your speech until you say
+    the wake word again. The recording is transcribed and sent to an LLM for a
+    conversational response, optionally spoken back via TTS.
+    **Conversation flow:**
+      1. Say wake word → starts recording
+      2. Speak your question/command
+      3. Say wake word again → stops recording and processes
+    The assistant runs in a loop, ready for the next command after each response.
+    Stop with Ctrl+C or `--stop`.
+    **Requirements:**
+      - Wyoming wake word server (e.g., wyoming-openwakeword on port 10400)
+      - Wyoming ASR server (e.g., wyoming-whisper on port 10300)
+      - Optional: TTS server for spoken responses (enable with `--tts`)
+    **Example:**
+      `assistant --wake-word ok_nabu --tts --input-device-name USB`
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/autocorrect.py CHANGED Viewed

@@ -1,4 +1,4 @@
-"""Read text from clipboard, correct it using a local or remote LLM, and write the result back to the clipboard."""
+"""Fix grammar, spelling, and punctuation in text using an LLM."""
 from __future__ import annotations
@@ -216,7 +216,7 @@ def autocorrect(
     *,
     text: str | None = typer.Argument(
         None,
-        help="The text to correct. If not provided, reads from clipboard.",
+        help="Text to correct. If omitted, reads from system clipboard.",
         rich_help_panel="General Options",
     ),
     # --- Provider Selection ---
@@ -240,7 +240,33 @@ def autocorrect(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Correct text from clipboard using a local or remote LLM."""
+    """Fix grammar, spelling, and punctuation using an LLM.
+    Reads text from clipboard (or argument), sends to LLM for correction,
+    and copies the result back to clipboard. Only makes technical corrections
+    without changing meaning or tone.
+    **Workflow:**
+    1. Read text from clipboard (or `TEXT` argument)
+    2. Send to LLM for grammar/spelling/punctuation fixes
+    3. Copy corrected text to clipboard (unless `--json`)
+    4. Display result
+    **Examples:**
+    ```bash
+    # Correct text from clipboard (default)
+    agent-cli autocorrect
+    # Correct specific text
+    agent-cli autocorrect "this is incorect"
+    # Use OpenAI instead of local Ollama
+    agent-cli autocorrect --llm-provider openai
+    # Get JSON output for scripting (disables clipboard)
+    agent-cli autocorrect --json
+    ```
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/chat.py CHANGED Viewed

@@ -1,13 +1,15 @@
-"""An chat agent that you can talk to.
-This agent will:
-- Listen for your voice command.
-- Transcribe the command.
-- Send the transcription to an LLM.
-- Speak the LLM's response.
-- Remember the conversation history.
-- Attach timestamps to the saved conversation.
-- Format timestamps as "ago" when sending to the LLM.
+"""Voice-based conversational chat agent with memory and tools.
+Runs an interactive voice loop: listens for speech, transcribes it,
+sends to the LLM (with conversation context), and optionally speaks the response.
+**Available tools** (automatically used by the LLM when relevant):
+- `add_memory`/`search_memory`/`update_memory` - persistent long-term memory
+- `duckduckgo_search` - web search for current information
+- `read_file`/`execute_code` - file access and shell commands
+**Process management**: Use `--toggle` to start/stop via hotkey, `--stop` to terminate,
+or `--status` to check if running. Useful for binding to a keyboard shortcut.
 """
 from __future__ import annotations
@@ -425,14 +427,15 @@ def chat(
     history_dir: Path = typer.Option(  # noqa: B008
         "~/.config/agent-cli/history",
         "--history-dir",
-        help="Directory to store conversation history.",
+        help="Directory for conversation history and long-term memory. "
+        "Both `conversation.json` and `long_term_memory.json` are stored here.",
         rich_help_panel="History Options",
     ),
     last_n_messages: int = typer.Option(
         50,
         "--last-n-messages",
-        help="Number of messages to include in the conversation history."
-        " Set to 0 to disable history.",
+        help="Number of past messages to include as context for the LLM. "
+        "Set to 0 to start fresh each session (memory tools still persist).",
         rich_help_panel="History Options",
     ),
     # --- General Options ---
@@ -444,7 +447,34 @@ def chat(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """An chat agent that you can talk to."""
+    """Voice-based conversational chat agent with memory and tools.
+    Runs an interactive loop: listen → transcribe → LLM → speak response.
+    Conversation history is persisted and included as context for continuity.
+    **Built-in tools** (LLM uses automatically when relevant):
+    - `add_memory`/`search_memory`/`update_memory` - persistent long-term memory
+    - `duckduckgo_search` - web search for current information
+    - `read_file`/`execute_code` - file access and shell commands
+    **Process management**: Use `--toggle` to start/stop via hotkey (bind to
+    a keyboard shortcut), `--stop` to terminate, or `--status` to check state.
+    **Examples**:
+    Use OpenAI-compatible providers for speech and LLM, with TTS enabled:
+        agent-cli chat --asr-provider openai --llm-provider openai --tts
+    Start in background mode (toggle on/off with hotkey):
+        agent-cli chat --toggle
+    Use local Ollama LLM with Wyoming ASR:
+        agent-cli chat --llm-provider ollama
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/memory/__init__.py CHANGED Viewed

@@ -9,7 +9,25 @@ from agent_cli.core.process import set_process_title
 memory_app = typer.Typer(
     name="memory",
-    help="Memory system operations (add, proxy, etc.).",
+    help="""Long-term memory system for AI chat applications.
+Provides persistent memory across conversations by storing facts and context
+in Markdown files, with automatic vector indexing for semantic retrieval.
+**Subcommands:**
+- `proxy`: Start an OpenAI-compatible proxy that injects relevant memories
+  into chat requests and extracts new facts from responses
+- `add`: Manually add facts/memories without going through LLM extraction
+**Quick Start:**
+    # Start the memory proxy (point your chat client at localhost:8100)
+    agent-cli memory proxy --openai-base-url http://localhost:11434/v1
+    # Manually seed some memories
+    agent-cli memory add "User prefers dark mode" "User is a Python developer"
+""",
     add_completion=True,
     rich_markup_mode="markdown",
     no_args_is_help=True,

agent_cli/agents/memory/add.py CHANGED Viewed

@@ -127,17 +127,17 @@ def add(
         "default",
         "--conversation-id",
         "-c",
-        help="Conversation ID to add memories to.",
+        help="Conversation namespace for these memories. Memories are retrieved per-conversation unless shared globally.",
     ),
     memory_path: Path = typer.Option(  # noqa: B008
         "./memory_db",
         "--memory-path",
-        help="Path to the memory store.",
+        help="Directory for memory storage (same as `memory proxy --memory-path`).",
     ),
     git_versioning: bool = typer.Option(
         True,  # noqa: FBT003
         "--git-versioning/--no-git-versioning",
-        help="Commit changes to git.",
+        help="Auto-commit changes to git for version history.",
     ),
     quiet: bool = opts.QUIET,
     config_file: str | None = opts.CONFIG_FILE,

agent_cli/agents/memory/proxy.py CHANGED Viewed

@@ -19,7 +19,7 @@ from agent_cli.core.utils import console, print_command_line_args
 def proxy(
     memory_path: Path = typer.Option(  # noqa: B008
         "./memory_db",
-        help="Path to the memory store (files + derived vector index).",
+        help="Directory for memory storage. Contains `entries/` (Markdown files) and `chroma/` (vector index). Created automatically if it doesn't exist.",
         rich_help_panel="Memory Configuration",
     ),
     openai_base_url: str | None = opts.OPENAI_BASE_URL,
@@ -27,7 +27,7 @@ def proxy(
     openai_api_key: str | None = opts.OPENAI_API_KEY,
     default_top_k: int = typer.Option(
         5,
-        help="Number of memory entries to retrieve per query.",
+        help="Number of relevant memories to inject into each request. Higher values provide more context but increase token usage.",
         rich_help_panel="Memory Configuration",
     ),
     host: str = opts.SERVER_HOST,
@@ -38,7 +38,7 @@ def proxy(
     ),
     max_entries: int = typer.Option(
         500,
-        help="Maximum stored memory entries per conversation (excluding summary).",
+        help="Maximum entries per conversation before oldest are evicted. Summaries are preserved separately.",
         rich_help_panel="Memory Configuration",
     ),
     mmr_lambda: float = typer.Option(
@@ -48,7 +48,7 @@ def proxy(
     ),
     recency_weight: float = typer.Option(
         0.2,
-        help="Recency score weight (0.0-1.0). Controls freshness vs. relevance. Default 0.2 (20% recency, 80% semantic relevance).",
+        help="Weight for recency vs semantic relevance (0.0-1.0). At 0.2: 20% recency, 80% semantic similarity.",
         rich_help_panel="Memory Configuration",
     ),
     score_threshold: float = typer.Option(
@@ -59,13 +59,13 @@ def proxy(
     summarization: bool = typer.Option(
         True,  # noqa: FBT003
         "--summarization/--no-summarization",
-        help="Enable automatic fact extraction and summaries.",
+        help="Extract facts and generate summaries after each turn using the LLM. Disable to only store raw conversation turns.",
         rich_help_panel="Memory Configuration",
     ),
     git_versioning: bool = typer.Option(
         True,  # noqa: FBT003
         "--git-versioning/--no-git-versioning",
-        help="Enable automatic git commit of memory changes.",
+        help="Auto-commit memory changes to git. Initializes a repo in `--memory-path` if needed. Provides full history of memory evolution.",
         rich_help_panel="Memory Configuration",
     ),
     log_level: opts.LogLevel = opts.LOG_LEVEL,
@@ -78,7 +78,7 @@ def proxy(
     CLI, or IDE plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI,
     Ollama, vLLM).
-    Key Features:
+    **Key Features:**
     - **Simple Markdown Files:** Memories are stored as human-readable Markdown
       files, serving as the ultimate source of truth.
@@ -89,7 +89,7 @@ def proxy(
     - **Proxy Middleware:** Works transparently with any OpenAI-compatible
       `/chat/completions` endpoint.
-    How it works:
+    **How it works:**
     1.  Intercepts `POST /v1/chat/completions` requests.
     2.  **Retrieves** relevant memories (facts, previous conversations) from a
@@ -99,8 +99,17 @@ def proxy(
     5.  **Extracts** new facts from the conversation in the background and
         updates the long-term memory store (including handling contradictions).
-    Use this to give "long-term memory" to any OpenAI-compatible application.
-    Point your client's base URL to `http://localhost:8100/v1`.
+    **Example:**
+        # Start proxy pointing to local Ollama
+        agent-cli memory proxy --openai-base-url http://localhost:11434/v1
+        # Then configure your chat client to use http://localhost:8100/v1
+        # as its OpenAI base URL. All requests flow through the memory proxy.
+    **Per-request overrides:** Clients can include these fields in the request
+    body: `memory_id` (conversation ID), `memory_top_k`, `memory_recency_weight`,
+    `memory_score_threshold`.
     """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/rag_proxy.py CHANGED Viewed

@@ -23,12 +23,12 @@ from agent_cli.core.utils import (
 def rag_proxy(
     docs_folder: Path = typer.Option(  # noqa: B008
         "./rag_docs",
-        help="Folder to watch for documents",
+        help="Folder to watch for documents. Files are auto-indexed on startup and when changed. Must not overlap with `--chroma-path`.",
         rich_help_panel="RAG Configuration",
     ),
     chroma_path: Path = typer.Option(  # noqa: B008
         "./rag_db",
-        help="Path to ChromaDB persistence directory",
+        help="ChromaDB storage directory for vector embeddings. Must be separate from `--docs-folder` to avoid indexing database files.",
         rich_help_panel="RAG Configuration",
     ),
     openai_base_url: str | None = opts.OPENAI_BASE_URL,
@@ -36,13 +36,13 @@ def rag_proxy(
     openai_api_key: str | None = opts.OPENAI_API_KEY,
     limit: int = typer.Option(
         3,
-        help="Number of document chunks to retrieve per query.",
+        help="Number of document chunks to retrieve per query. Higher values provide more context but use more tokens. Can be overridden per-request via `rag_top_k` in the JSON body.",
         rich_help_panel="RAG Configuration",
     ),
     host: str = opts.SERVER_HOST,
     port: int = typer.Option(
         8000,
-        help="Port to bind to",
+        help="Port for the RAG proxy API (e.g., `http://localhost:8000/v1/chat/completions`).",
         rich_help_panel="Server Configuration",
     ),
     log_level: opts.LogLevel = opts.LOG_LEVEL,
@@ -51,15 +51,47 @@ def rag_proxy(
     enable_rag_tools: bool = typer.Option(
         True,  # noqa: FBT003
         "--rag-tools/--no-rag-tools",
-        help="Allow agent to fetch full documents when snippets are insufficient.",
+        help="Enable `read_full_document()` tool so the LLM can request full document content when retrieved snippets are insufficient. Can be overridden per-request via `rag_enable_tools` in the JSON body.",
         rich_help_panel="RAG Configuration",
     ),
 ) -> None:
-    """Start the RAG (Retrieval-Augmented Generation) Proxy Server.
+    """Start a RAG proxy server that enables "chat with your documents".
-    This server watches a folder for documents, indexes them, and provides
-    an OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp),
-    injecting relevant context from the documents.
+    Watches a folder for documents, indexes them into a vector store, and provides an
+    OpenAI-compatible API at `/v1/chat/completions`. When you send a chat request,
+    the server retrieves relevant document chunks and injects them as context before
+    forwarding to your LLM backend.
+    **Quick start:**
+    - `agent-cli rag-proxy` — Start with defaults (./rag_docs, OpenAI-compatible API)
+    - `agent-cli rag-proxy --docs-folder ~/notes` — Index your notes folder
+    **How it works:**
+    1. Documents in `--docs-folder` are chunked, embedded, and stored in ChromaDB
+    2. A file watcher auto-reindexes when files change
+    3. Chat requests trigger a semantic search for relevant chunks
+    4. Retrieved context is injected into the prompt before forwarding to the LLM
+    5. Responses include a `rag_sources` field listing which documents were used
+    **Supported file formats:**
+    Text: `.txt`, `.md`, `.json`, `.py`, `.js`, `.ts`, `.yaml`, `.toml`, `.rst`, etc.
+    Rich documents (via MarkItDown): `.pdf`, `.docx`, `.pptx`, `.xlsx`, `.html`, `.csv`
+    **API endpoints:**
+    - `POST /v1/chat/completions` — Main chat endpoint (OpenAI-compatible)
+    - `GET /health` — Health check with configuration info
+    - `GET /files` — List indexed files with chunk counts
+    - `POST /reindex` — Trigger manual reindex
+    - All other paths are proxied to the LLM backend
+    **Per-request overrides (in JSON body):**
+    - `rag_top_k`: Override `--limit` for this request
+    - `rag_enable_tools`: Override `--rag-tools` for this request
     """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/speak.py CHANGED Viewed

@@ -86,7 +86,7 @@ def speak(
     *,
     text: str | None = typer.Argument(
         None,
-        help="Text to speak. Reads from clipboard if not provided.",
+        help="Text to synthesize. If not provided, reads from clipboard.",
         rich_help_panel="General Options",
     ),
     # --- Provider Selection ---
@@ -127,7 +127,27 @@ def speak(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Convert text to speech using Wyoming or OpenAI-compatible TTS server."""
+    """Convert text to speech and play audio through speakers.
+    By default, synthesized audio plays immediately. Use `--save-file` to save
+    to a WAV file instead (skips playback).
+    Text can be provided as an argument or read from clipboard automatically.
+    **Examples:**
+    Speak text directly:
+        `agent-cli speak "Hello, world!"`
+    Speak clipboard contents:
+        `agent-cli speak`
+    Save to file instead of playing:
+        `agent-cli speak "Hello" --save-file greeting.wav`
+    Use OpenAI-compatible TTS:
+        `agent-cli speak "Hello" --tts-provider openai`
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/transcribe.py CHANGED Viewed

@@ -471,7 +471,7 @@ def transcribe(  # noqa: PLR0912
     extra_instructions: str | None = typer.Option(
         None,
         "--extra-instructions",
-        help="Additional instructions for the LLM to process the transcription.",
+        help="Extra instructions appended to the LLM cleanup prompt (requires `--llm`).",
         rich_help_panel="LLM Configuration",
     ),
     from_file: Path | None = opts.FROM_FILE,
@@ -513,7 +513,25 @@ def transcribe(  # noqa: PLR0912
     print_args: bool = opts.PRINT_ARGS,
     transcription_log: Path | None = opts.TRANSCRIPTION_LOG,
 ) -> None:
-    """Wyoming ASR Client for streaming microphone audio to a transcription server."""
+    """Record audio from microphone and transcribe to text.
+    Records until you press Ctrl+C (or send SIGINT), then transcribes using your
+    configured ASR provider. The transcript is copied to the clipboard by default.
+    **With `--llm`**: Passes the raw transcript through an LLM to clean up speech
+    recognition errors, add punctuation, remove filler words, and improve readability.
+    **With `--toggle`**: Bind to a hotkey for push-to-talk. First call starts recording,
+    second call stops and transcribes.
+    **Examples**:
+    - Record and transcribe: `agent-cli transcribe`
+    - With LLM cleanup: `agent-cli transcribe --llm`
+    - Re-transcribe last recording: `agent-cli transcribe --last-recording 1`
+    """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/transcribe_daemon.py CHANGED Viewed

@@ -296,45 +296,45 @@ def transcribe_daemon(  # noqa: PLR0912
         "user",
         "--role",
         "-r",
-        help="Role name for logging (e.g., 'meeting', 'notes', 'user').",
+        help="Label for log entries. Use to distinguish speakers or contexts in logs.",
     ),
     silence_threshold: float = typer.Option(
         1.0,
         "--silence-threshold",
         "-s",
-        help="Seconds of silence to end a speech segment.",
+        help="Seconds of silence after speech to finalize a segment. Increase for slower speakers.",
     ),
     min_segment: float = typer.Option(
         0.25,
         "--min-segment",
         "-m",
-        help="Minimum speech duration in seconds to trigger a segment.",
+        help="Minimum seconds of speech required before a segment is processed. Filters brief sounds.",
     ),
     vad_threshold: float = typer.Option(
         0.3,
         "--vad-threshold",
-        help="VAD speech detection threshold (0.0-1.0). Higher = more aggressive filtering.",
+        help="Silero VAD confidence threshold (0.0-1.0). Higher values require clearer speech; lower values are more sensitive to quiet/distant voices.",
     ),
     save_audio: bool = typer.Option(
         True,  # noqa: FBT003
         "--save-audio/--no-save-audio",
-        help="Save audio segments as MP3 files.",
+        help="Save each speech segment as MP3. Requires `ffmpeg` to be installed.",
     ),
     audio_dir: Path | None = typer.Option(  # noqa: B008
         None,
         "--audio-dir",
-        help="Directory for MP3 files. Default: ~/.config/agent-cli/audio",
+        help="Base directory for MP3 files. Files are organized by date: `YYYY/MM/DD/HHMMSS_mmm.mp3`. Default: `~/.config/agent-cli/audio`.",
     ),
     transcription_log: Path | None = typer.Option(  # noqa: B008
         None,
         "--transcription-log",
         "-t",
-        help="JSON Lines log file path. Default: ~/.config/agent-cli/transcriptions.jsonl",
+        help="JSONL file for transcript logging (one JSON object per line with timestamp, role, raw/processed text, audio path). Default: `~/.config/agent-cli/transcriptions.jsonl`.",
     ),
     clipboard: bool = typer.Option(
         False,  # noqa: FBT003
         "--clipboard/--no-clipboard",
-        help="Copy each transcription to clipboard.",
+        help="Copy each completed transcription to clipboard (overwrites previous). Useful with `--llm` to get cleaned text.",
     ),
     # --- Provider Selection ---
     asr_provider: str = opts.ASR_PROVIDER,
@@ -368,25 +368,37 @@ def transcribe_daemon(  # noqa: PLR0912
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Run a continuous transcription daemon with voice activity detection.
+    """Continuous transcription daemon using Silero VAD for speech detection.
-    This command runs indefinitely, capturing audio from your microphone,
-    detecting speech segments using Silero VAD, transcribing them, and
-    logging results with timestamps.
+    Unlike `transcribe` (single recording session), this daemon runs indefinitely
+    and automatically detects speech segments using Voice Activity Detection (VAD).
+    Each detected segment is transcribed and logged with timestamps.
-    Examples:
-        # Basic daemon
-        agent-cli transcribe-daemon
+    **How it works:**
-        # With role and custom silence threshold
-        agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
+    1. Listens continuously to microphone input
+    2. Silero VAD detects when you start/stop speaking
+    3. After `--silence-threshold` seconds of silence, the segment is finalized
+    4. Segment is transcribed (and optionally cleaned by LLM with `--llm`)
+    5. Results are appended to the JSONL log file
+    6. Audio is saved as MP3 if `--save-audio` is enabled (requires `ffmpeg`)
+    **Use cases:** Meeting transcription, note-taking, voice journaling, accessibility.
-        # With LLM cleanup
-        agent-cli transcribe-daemon --llm --role notes
+    **Examples:**
+        agent-cli transcribe-daemon
+        agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
+        agent-cli transcribe-daemon --llm --clipboard --role notes
+        agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --no-save-audio
+        agent-cli transcribe-daemon --asr-provider openai --llm-provider gemini --llm
-        # Custom log file and audio directory
-        agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
+    **Tips:**
+    - Use `--role` to tag entries (e.g., `speaker1`, `meeting`, `personal`)
+    - Adjust `--vad-threshold` if detection is too sensitive (increase) or missing speech (decrease)
+    - Use `--stop` to cleanly terminate a running daemon
+    - With `--llm`, transcripts are cleaned up (punctuation, filler words removed)
     """
     if print_args:
         print_command_line_args(locals())

agent_cli/agents/voice_edit.py CHANGED Viewed

@@ -229,15 +229,23 @@ def voice_edit(
     config_file: str | None = opts.CONFIG_FILE,
     print_args: bool = opts.PRINT_ARGS,
 ) -> None:
-    """Interact with clipboard text via a voice command using local or remote services.
-    Usage:
-    - Run in foreground: agent-cli voice-edit --input-device-index 1
-    - Run in background: agent-cli voice-edit --input-device-index 1 &
-    - Check status: agent-cli voice-edit --status
-    - Stop background process: agent-cli voice-edit --stop
-    - List output devices: agent-cli voice-edit --list-output-devices
-    - Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
+    """Edit or query clipboard text using voice commands.
+    **Workflow:** Captures clipboard text → records your voice command → transcribes
+    it → sends both to an LLM → copies result back to clipboard.
+    Use this for hands-free text editing (e.g., "make this more formal") or
+    asking questions about clipboard content (e.g., "summarize this").
+    **Typical hotkey integration:** Run `voice-edit &` on keypress to start
+    recording, then send SIGINT (via `--stop`) on second keypress to process.
+    **Examples:**
+    - Basic usage: `agent-cli voice-edit`
+    - With TTS response: `agent-cli voice-edit --tts`
+    - Toggle on/off: `agent-cli voice-edit --toggle`
+    - List audio devices: `agent-cli voice-edit --list-devices`
     """
     if print_args:
         print_command_line_args(locals())

agent-cli 0.70.5__py3-none-any.whl → 0.71.0__py3-none-any.whl

agent-cli 0.70.5py3-none-any.whl → 0.71.0py3-none-any.whl