PyPI - agent-cli - Versions diffs - 0.2.0__tar.gz → 0.3.1__tar.gz - Mend

agent-cli 0.2.0tar.gz → 0.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

{agent_cli-0.2.0 → agent_cli-0.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: agent-cli
-Version: 0.2.0
+Version: 0.3.1
 Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
 Author-email: Bas Nijholt <bas@nijho.lt>
 Project-URL: Homepage, https://github.com/basnijholt/agent-cli
@@ -196,13 +196,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
 │                                                   [default: 192.168.1.143]   │
 │ --asr-server-port                        INTEGER  Wyoming ASR server port.   │
 │                                                   [default: 10300]           │
-│ --clipboard            --no-clipboard             Copy transcript to         │
-│                                                   clipboard.                 │
-│                                                   [default: clipboard]       │
+│ --model            -m                    TEXT     The Ollama model to use.   │
+│                                                   Default is devstral:24b.   │
+│                                                   [default: devstral:24b]    │
+│ --ollama-host                            TEXT     The Ollama server host.    │
+│                                                   Default is                 │
+│                                                   http://localhost:11434.    │
+│                                                   [default:                  │
+│                                                   http://localhost:11434]    │
+│ --llm                  --no-llm                   Use an LLM to process the  │
+│                                                   transcript.                │
+│                                                   [default: no-llm]          │
 │ --stop                                            Stop any running           │
 │                                                   background process.        │
 │ --status                                          Check if a background      │
 │                                                   process is running.        │
+│ --clipboard            --no-clipboard             Copy result to clipboard.  │
+│                                                   [default: clipboard]       │
 │ --log-level                              TEXT     Set logging level.         │
 │                                                   [default: WARNING]         │
 │ --log-file                               TEXT     Path to a file to write    │
@@ -268,35 +278,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
  voice-assistant --stop
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --device-index             INTEGER  Index of the PyAudio input device to     │
-│                                     use.                                     │
-│                                     [default: None]                          │
-│ --device-name              TEXT     Device name keywords for partial         │
-│                                     matching. Supports comma-separated list  │
-│                                     where each term can partially match      │
-│                                     device names (case-insensitive). First   │
-│                                     matching device is selected.             │
-│                                     [default: None]                          │
-│ --list-devices                      List available audio input devices and   │
-│                                     exit.                                    │
-│ --asr-server-ip            TEXT     Wyoming ASR server IP address.           │
-│                                     [default: 192.168.1.143]                 │
-│ --asr-server-port          INTEGER  Wyoming ASR server port.                 │
-│                                     [default: 10300]                         │
-│ --model            -m      TEXT     The Ollama model to use. Default is      │
-│                                     devstral:24b.                            │
-│                                     [default: devstral:24b]                  │
-│ --ollama-host              TEXT     The Ollama server host. Default is       │
-│                                     http://localhost:11434.                  │
-│                                     [default: http://localhost:11434]        │
-│ --stop                              Stop any running background process.     │
-│ --status                            Check if a background process is         │
-│                                     running.                                 │
-│ --log-level                TEXT     Set logging level. [default: WARNING]    │
-│ --log-file                 TEXT     Path to a file to write logs to.         │
-│                                     [default: None]                          │
-│ --quiet            -q               Suppress console output from rich.       │
-│ --help                              Show this message and exit.              │
+│ --device-index                           INTEGER  Index of the PyAudio input │
+│                                                   device to use.             │
+│                                                   [default: None]            │
+│ --device-name                            TEXT     Device name keywords for   │
+│                                                   partial matching. Supports │
+│                                                   comma-separated list where │
+│                                                   each term can partially    │
+│                                                   match device names         │
+│                                                   (case-insensitive). First  │
+│                                                   matching device is         │
+│                                                   selected.                  │
+│                                                   [default: None]            │
+│ --list-devices                                    List available audio input │
+│                                                   devices and exit.          │
+│ --asr-server-ip                          TEXT     Wyoming ASR server IP      │
+│                                                   address.                   │
+│                                                   [default: 192.168.1.143]   │
+│ --asr-server-port                        INTEGER  Wyoming ASR server port.   │
+│                                                   [default: 10300]           │
+│ --model            -m                    TEXT     The Ollama model to use.   │
+│                                                   Default is devstral:24b.   │
+│                                                   [default: devstral:24b]    │
+│ --ollama-host                            TEXT     The Ollama server host.    │
+│                                                   Default is                 │
+│                                                   http://localhost:11434.    │
+│                                                   [default:                  │
+│                                                   http://localhost:11434]    │
+│ --stop                                            Stop any running           │
+│                                                   background process.        │
+│ --status                                          Check if a background      │
+│                                                   process is running.        │
+│ --clipboard            --no-clipboard             Copy result to clipboard.  │
+│                                                   [default: clipboard]       │
+│ --log-level                              TEXT     Set logging level.         │
+│                                                   [default: WARNING]         │
+│ --log-file                               TEXT     Path to a file to write    │
+│                                                   logs to.                   │
+│                                                   [default: None]            │
+│ --quiet            -q                             Suppress console output    │
+│                                                   from rich.                 │
+│ --help                                            Show this message and      │
+│                                                   exit.                      │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ```

{agent_cli-0.2.0 → agent_cli-0.3.1}/README.md RENAMED Viewed

@@ -166,13 +166,23 @@ Transcribes whatever you say into text using Wyoming ASR (Automatic Speech Recog
 │                                                   [default: 192.168.1.143]   │
 │ --asr-server-port                        INTEGER  Wyoming ASR server port.   │
 │                                                   [default: 10300]           │
-│ --clipboard            --no-clipboard             Copy transcript to         │
-│                                                   clipboard.                 │
-│                                                   [default: clipboard]       │
+│ --model            -m                    TEXT     The Ollama model to use.   │
+│                                                   Default is devstral:24b.   │
+│                                                   [default: devstral:24b]    │
+│ --ollama-host                            TEXT     The Ollama server host.    │
+│                                                   Default is                 │
+│                                                   http://localhost:11434.    │
+│                                                   [default:                  │
+│                                                   http://localhost:11434]    │
+│ --llm                  --no-llm                   Use an LLM to process the  │
+│                                                   transcript.                │
+│                                                   [default: no-llm]          │
 │ --stop                                            Stop any running           │
 │                                                   background process.        │
 │ --status                                          Check if a background      │
 │                                                   process is running.        │
+│ --clipboard            --no-clipboard             Copy result to clipboard.  │
+│                                                   [default: clipboard]       │
 │ --log-level                              TEXT     Set logging level.         │
 │                                                   [default: WARNING]         │
 │ --log-file                               TEXT     Path to a file to write    │
@@ -238,35 +248,48 @@ The process management features make it perfect for hotkey toggles. Use `--statu
  voice-assistant --stop
 ╭─ Options ────────────────────────────────────────────────────────────────────╮
-│ --device-index             INTEGER  Index of the PyAudio input device to     │
-│                                     use.                                     │
-│                                     [default: None]                          │
-│ --device-name              TEXT     Device name keywords for partial         │
-│                                     matching. Supports comma-separated list  │
-│                                     where each term can partially match      │
-│                                     device names (case-insensitive). First   │
-│                                     matching device is selected.             │
-│                                     [default: None]                          │
-│ --list-devices                      List available audio input devices and   │
-│                                     exit.                                    │
-│ --asr-server-ip            TEXT     Wyoming ASR server IP address.           │
-│                                     [default: 192.168.1.143]                 │
-│ --asr-server-port          INTEGER  Wyoming ASR server port.                 │
-│                                     [default: 10300]                         │
-│ --model            -m      TEXT     The Ollama model to use. Default is      │
-│                                     devstral:24b.                            │
-│                                     [default: devstral:24b]                  │
-│ --ollama-host              TEXT     The Ollama server host. Default is       │
-│                                     http://localhost:11434.                  │
-│                                     [default: http://localhost:11434]        │
-│ --stop                              Stop any running background process.     │
-│ --status                            Check if a background process is         │
-│                                     running.                                 │
-│ --log-level                TEXT     Set logging level. [default: WARNING]    │
-│ --log-file                 TEXT     Path to a file to write logs to.         │
-│                                     [default: None]                          │
-│ --quiet            -q               Suppress console output from rich.       │
-│ --help                              Show this message and exit.              │
+│ --device-index                           INTEGER  Index of the PyAudio input │
+│                                                   device to use.             │
+│                                                   [default: None]            │
+│ --device-name                            TEXT     Device name keywords for   │
+│                                                   partial matching. Supports │
+│                                                   comma-separated list where │
+│                                                   each term can partially    │
+│                                                   match device names         │
+│                                                   (case-insensitive). First  │
+│                                                   matching device is         │
+│                                                   selected.                  │
+│                                                   [default: None]            │
+│ --list-devices                                    List available audio input │
+│                                                   devices and exit.          │
+│ --asr-server-ip                          TEXT     Wyoming ASR server IP      │
+│                                                   address.                   │
+│                                                   [default: 192.168.1.143]   │
+│ --asr-server-port                        INTEGER  Wyoming ASR server port.   │
+│                                                   [default: 10300]           │
+│ --model            -m                    TEXT     The Ollama model to use.   │
+│                                                   Default is devstral:24b.   │
+│                                                   [default: devstral:24b]    │
+│ --ollama-host                            TEXT     The Ollama server host.    │
+│                                                   Default is                 │
+│                                                   http://localhost:11434.    │
+│                                                   [default:                  │
+│                                                   http://localhost:11434]    │
+│ --stop                                            Stop any running           │
+│                                                   background process.        │
+│ --status                                          Check if a background      │
+│                                                   process is running.        │
+│ --clipboard            --no-clipboard             Copy result to clipboard.  │
+│                                                   [default: clipboard]       │
+│ --log-level                              TEXT     Set logging level.         │
+│                                                   [default: WARNING]         │
+│ --log-file                               TEXT     Path to a file to write    │
+│                                                   logs to.                   │
+│                                                   [default: None]            │
+│ --quiet            -q                             Suppress console output    │
+│                                                   from rich.                 │
+│ --help                                            Show this message and      │
+│                                                   exit.                      │
 ╰──────────────────────────────────────────────────────────────────────────────╯
 ```

{agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/agents/_cli_options.py RENAMED Viewed

@@ -18,6 +18,11 @@ OLLAMA_HOST: str = typer.Option(
     "--ollama-host",
     help=f"The Ollama server host. Default is {config.OLLAMA_HOST}.",
 )
+LLM: bool = typer.Option(
+    False,  # noqa: FBT003
+    "--llm/--no-llm",
+    help="Use an LLM to process the transcript.",
+)
 # --- ASR (Audio) Options ---
@@ -47,11 +52,6 @@ ASR_SERVER_PORT: int = typer.Option(
     "--asr-server-port",
     help="Wyoming ASR server port.",
 )
-CLIPBOARD: bool = typer.Option(
-    True,  # noqa: FBT003
-    "--clipboard/--no-clipboard",
-    help="Copy transcript to clipboard.",
-)
 # --- Process Management Options ---
@@ -68,6 +68,11 @@ STATUS: bool = typer.Option(
 # --- General Options ---
+CLIPBOARD: bool = typer.Option(
+    True,  # noqa: FBT003
+    "--clipboard/--no-clipboard",
+    help="Copy result to clipboard.",
+)
 LOG_LEVEL: str = typer.Option(
     "WARNING",
     "--log-level",

{agent_cli-0.2.0 → agent_cli-0.3.1}/agent_cli/agents/autocorrect.py RENAMED Viewed

@@ -27,13 +27,18 @@ import typer
 from openai import APIConnectionError
 from pydantic_ai.exceptions import ModelHTTPError
 from rich.console import Console
-from rich.panel import Panel
 from rich.status import Status
 import agent_cli.agents._cli_options as opts
 from agent_cli.cli import app, setup_logging
-from agent_cli.ollama_client import build_agent
-from agent_cli.utils import get_clipboard_text
+from agent_cli.llm import build_agent
+from agent_cli.utils import (
+    get_clipboard_text,
+    print_error_message,
+    print_input_panel,
+    print_output_panel,
+    print_status_message,
+)
 # --- Configuration ---
@@ -71,16 +76,7 @@ async def process_text(text: str, model: str, ollama_host: str) -> tuple[str, fl
 def display_original_text(original_text: str, console: Console | None) -> None:
     """Render the original text panel in verbose mode."""
-    if console is None:
-        return
-    console.print(
-        Panel(
-            original_text,
-            title="[bold cyan]📋 Original Text[/bold cyan]",
-            border_style="cyan",
-            padding=(1, 2),
-        ),
-    )
+    print_input_panel(console, original_text, title="📋 Original Text")
 def _display_result(
@@ -101,16 +97,15 @@ def _display_result(
             print(corrected_text)
     else:
         assert console is not None
-        console.print(
-            Panel(
-                corrected_text,
-                title="[bold green]✨ Corrected Text[/bold green]",
-                border_style="green",
-                padding=(1, 2),
-            ),
+        print_output_panel(
+            console,
+            corrected_text,
+            title="✨ Corrected Text",
+            subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
         )
-        console.print(
-            f"✅ [bold green]Success! Corrected text has been copied to your clipboard. [bold yellow](took {elapsed:.2f} seconds)[/bold yellow][/bold green]",
+        print_status_message(
+            console,
+            "✅ Success! Corrected text has been copied to your clipboard.",
         )
@@ -166,9 +161,10 @@ def autocorrect(
     except (httpx.ConnectError, ModelHTTPError, APIConnectionError) as e:
         if quiet:
             print(f"❌ {e}")
-        elif console:
-            console.print(f"❌ {e}", style="bold red")
-            console.print(
-                f"   Please check that your Ollama server is running at [bold cyan]{ollama_host}[/bold cyan]",
+        else:
+            print_error_message(
+                console,
+                str(e),
+                f"Please check that your Ollama server is running at [bold cyan]{ollama_host}[/bold cyan]",
             )
         sys.exit(1)

agent_cli-0.3.1/agent_cli/agents/transcribe.py ADDED Viewed

@@ -0,0 +1,220 @@
+"""Wyoming ASR Client for streaming microphone audio to a transcription server."""
+from __future__ import annotations
+import asyncio
+import logging
+from contextlib import AbstractContextManager, nullcontext, suppress
+from typing import TYPE_CHECKING
+import pyperclip
+from rich.console import Console
+from rich.live import Live
+from rich.text import Text
+import agent_cli.agents._cli_options as opts
+from agent_cli import asr, process_manager
+from agent_cli.cli import app, setup_logging
+from agent_cli.llm import process_and_update_clipboard
+from agent_cli.utils import (
+    print_device_index,
+    print_input_panel,
+    print_output_panel,
+    print_status_message,
+    signal_handling_context,
+)
+if TYPE_CHECKING:
+    import pyaudio
+LOGGER = logging.getLogger()
+SYSTEM_PROMPT = """
+You are an AI transcription cleanup assistant. Your purpose is to improve and refine raw speech-to-text transcriptions by correcting errors, adding proper punctuation, and enhancing readability while preserving the original meaning and intent.
+Your tasks include:
+- Correcting obvious speech recognition errors and mishearing
+- Adding appropriate punctuation (periods, commas, question marks, etc.)
+- Fixing capitalization where needed
+- Removing filler words, false starts, and repeated words when they clearly weren't intentional
+- Improving sentence structure and flow while maintaining the speaker's voice and meaning
+- Formatting the text for better readability
+Important rules:
+- Do not change the core meaning or content of the transcription
+- Do not add information that wasn't spoken
+- Do not remove content unless it's clearly an error or filler
+- Return ONLY the cleaned-up text without any explanations or commentary
+- Do not wrap your output in markdown or code blocks
+"""
+AGENT_INSTRUCTIONS = """
+You will be given a block of raw transcribed text enclosed in <original-text> tags, and a cleanup instruction enclosed in <instruction> tags.
+Your job is to process the transcribed text according to the instruction, which will typically involve:
+- Correcting speech recognition errors
+- Adding proper punctuation and capitalization
+- Removing obvious filler words and false starts
+- Improving readability while preserving meaning
+Return ONLY the cleaned-up text with no additional formatting or commentary.
+"""
+INSTRUCTION = """
+Please clean up this transcribed text by correcting any speech recognition errors, adding appropriate punctuation and capitalization, removing obvious filler words or false starts, and improving overall readability while preserving the original meaning and intent of the speaker.
+"""
+async def async_main(
+    *,
+    device_index: int | None,
+    asr_server_ip: str,
+    asr_server_port: int,
+    clipboard: bool,
+    quiet: bool,
+    model: str,
+    ollama_host: str,
+    llm: bool,
+    console: Console | None,
+    p: pyaudio.PyAudio,
+) -> None:
+    """Async entry point, consuming parsed args."""
+    with (
+        signal_handling_context(console, LOGGER) as stop_event,
+        _maybe_live(console) as live,
+    ):
+        transcript = await asr.transcribe_audio(
+            asr_server_ip=asr_server_ip,
+            asr_server_port=asr_server_port,
+            device_index=device_index,
+            logger=LOGGER,
+            p=p,
+            stop_event=stop_event,
+            console=console,
+            live=live,
+            listening_message="Listening...",
+        )
+    if llm and model and ollama_host and transcript:
+        print_input_panel(console, transcript, title="📝 Raw Transcript")
+        await process_and_update_clipboard(
+            system_prompt=SYSTEM_PROMPT,
+            agent_instructions=AGENT_INSTRUCTIONS,
+            model=model,
+            ollama_host=ollama_host,
+            logger=LOGGER,
+            console=console,
+            original_text=transcript,
+            instruction=INSTRUCTION,
+            clipboard=clipboard,
+        )
+        return
+    # When not using LLM, show transcript in output panel for consistency
+    if transcript:
+        if quiet:
+            # Quiet mode: print result to stdout for Keyboard Maestro to capture
+            print(transcript)
+        else:
+            print_output_panel(
+                console,
+                transcript,
+                title="📝 Transcript",
+                subtitle="[dim]Copied to clipboard[/dim]" if clipboard else None,
+            )
+        if clipboard:
+            pyperclip.copy(transcript)
+            LOGGER.info("Copied transcript to clipboard.")
+        else:
+            LOGGER.info("Clipboard copy disabled.")
+    else:
+        LOGGER.info("Transcript empty.")
+        if not quiet:
+            print_status_message(console, "⚠️ No transcript captured.", style="yellow")
+def _maybe_live(console: Console | None) -> AbstractContextManager[Live | None]:
+    if console:
+        return Live(
+            Text("Transcribing...", style="blue"),
+            console=console,
+            transient=True,
+        )
+    return nullcontext()
+@app.command("transcribe")
+def transcribe(
+    *,
+    device_index: int | None = opts.DEVICE_INDEX,
+    device_name: str | None = opts.DEVICE_NAME,
+    # ASR
+    list_devices: bool = opts.LIST_DEVICES,
+    asr_server_ip: str = opts.ASR_SERVER_IP,
+    asr_server_port: int = opts.ASR_SERVER_PORT,
+    # LLM
+    model: str = opts.MODEL,
+    ollama_host: str = opts.OLLAMA_HOST,
+    llm: bool = opts.LLM,
+    # Process control
+    stop: bool = opts.STOP,
+    status: bool = opts.STATUS,
+    # General
+    clipboard: bool = opts.CLIPBOARD,
+    log_level: str = opts.LOG_LEVEL,
+    log_file: str | None = opts.LOG_FILE,
+    quiet: bool = opts.QUIET,
+) -> None:
+    """Wyoming ASR Client for streaming microphone audio to a transcription server.
+    Usage:
+    - Run in foreground: agent-cli transcribe --device-index 1
+    - Run in background: agent-cli transcribe --device-index 1 &
+    - Check status: agent-cli transcribe --status
+    - Stop background process: agent-cli transcribe --stop
+    """
+    setup_logging(log_level, log_file, quiet=quiet)
+    console = Console() if not quiet else None
+    process_name = "transcribe"
+    if stop:
+        if process_manager.kill_process(process_name):
+            print_status_message(console, "✅ Transcribe stopped.")
+        else:
+            print_status_message(console, "⚠️  No transcribe is running.", style="yellow")
+        return
+    if status:
+        if process_manager.is_process_running(process_name):
+            pid = process_manager.read_pid_file(process_name)
+            print_status_message(console, f"✅ Transcribe is running (PID: {pid}).")
+        else:
+            print_status_message(console, "⚠️  Transcribe is not running.", style="yellow")
+        return
+    console = Console() if not quiet else None
+    with asr.pyaudio_context() as p:
+        if list_devices:
+            asr.list_input_devices(p, console)
+            return
+        device_index, device_name = asr.input_device(p, device_name, device_index)
+        print_device_index(console, device_index, device_name)
+        # Use context manager for PID file management
+        with process_manager.pid_file_context(process_name), suppress(KeyboardInterrupt):
+            asyncio.run(
+                async_main(
+                    device_index=device_index,
+                    asr_server_ip=asr_server_ip,
+                    asr_server_port=asr_server_port,
+                    clipboard=clipboard,
+                    quiet=quiet,
+                    model=model,
+                    ollama_host=ollama_host,
+                    llm=llm,
+                    console=console,
+                    p=p,
+                ),
+            )

agent-cli 0.2.0__tar.gz → 0.3.1__tar.gz

agent-cli 0.2.0tar.gz → 0.3.1tar.gz