agent-cli 0.70.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/__init__.py +5 -0
- agent_cli/__main__.py +6 -0
- agent_cli/_extras.json +14 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/_tools.py +368 -0
- agent_cli/agents/__init__.py +23 -0
- agent_cli/agents/_voice_agent_common.py +136 -0
- agent_cli/agents/assistant.py +383 -0
- agent_cli/agents/autocorrect.py +284 -0
- agent_cli/agents/chat.py +496 -0
- agent_cli/agents/memory/__init__.py +31 -0
- agent_cli/agents/memory/add.py +190 -0
- agent_cli/agents/memory/proxy.py +160 -0
- agent_cli/agents/rag_proxy.py +128 -0
- agent_cli/agents/speak.py +209 -0
- agent_cli/agents/transcribe.py +671 -0
- agent_cli/agents/transcribe_daemon.py +499 -0
- agent_cli/agents/voice_edit.py +291 -0
- agent_cli/api.py +22 -0
- agent_cli/cli.py +106 -0
- agent_cli/config.py +503 -0
- agent_cli/config_cmd.py +307 -0
- agent_cli/constants.py +27 -0
- agent_cli/core/__init__.py +1 -0
- agent_cli/core/audio.py +461 -0
- agent_cli/core/audio_format.py +299 -0
- agent_cli/core/chroma.py +88 -0
- agent_cli/core/deps.py +191 -0
- agent_cli/core/openai_proxy.py +139 -0
- agent_cli/core/process.py +195 -0
- agent_cli/core/reranker.py +120 -0
- agent_cli/core/sse.py +87 -0
- agent_cli/core/transcription_logger.py +70 -0
- agent_cli/core/utils.py +526 -0
- agent_cli/core/vad.py +175 -0
- agent_cli/core/watch.py +65 -0
- agent_cli/dev/__init__.py +14 -0
- agent_cli/dev/cli.py +1588 -0
- agent_cli/dev/coding_agents/__init__.py +19 -0
- agent_cli/dev/coding_agents/aider.py +24 -0
- agent_cli/dev/coding_agents/base.py +167 -0
- agent_cli/dev/coding_agents/claude.py +39 -0
- agent_cli/dev/coding_agents/codex.py +24 -0
- agent_cli/dev/coding_agents/continue_dev.py +15 -0
- agent_cli/dev/coding_agents/copilot.py +24 -0
- agent_cli/dev/coding_agents/cursor_agent.py +48 -0
- agent_cli/dev/coding_agents/gemini.py +28 -0
- agent_cli/dev/coding_agents/opencode.py +15 -0
- agent_cli/dev/coding_agents/registry.py +49 -0
- agent_cli/dev/editors/__init__.py +19 -0
- agent_cli/dev/editors/base.py +89 -0
- agent_cli/dev/editors/cursor.py +15 -0
- agent_cli/dev/editors/emacs.py +46 -0
- agent_cli/dev/editors/jetbrains.py +56 -0
- agent_cli/dev/editors/nano.py +31 -0
- agent_cli/dev/editors/neovim.py +33 -0
- agent_cli/dev/editors/registry.py +59 -0
- agent_cli/dev/editors/sublime.py +20 -0
- agent_cli/dev/editors/vim.py +42 -0
- agent_cli/dev/editors/vscode.py +15 -0
- agent_cli/dev/editors/zed.py +20 -0
- agent_cli/dev/project.py +568 -0
- agent_cli/dev/registry.py +52 -0
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/terminals/__init__.py +19 -0
- agent_cli/dev/terminals/apple_terminal.py +82 -0
- agent_cli/dev/terminals/base.py +56 -0
- agent_cli/dev/terminals/gnome.py +51 -0
- agent_cli/dev/terminals/iterm2.py +84 -0
- agent_cli/dev/terminals/kitty.py +77 -0
- agent_cli/dev/terminals/registry.py +48 -0
- agent_cli/dev/terminals/tmux.py +58 -0
- agent_cli/dev/terminals/warp.py +132 -0
- agent_cli/dev/terminals/zellij.py +78 -0
- agent_cli/dev/worktree.py +856 -0
- agent_cli/docs_gen.py +417 -0
- agent_cli/example-config.toml +185 -0
- agent_cli/install/__init__.py +5 -0
- agent_cli/install/common.py +89 -0
- agent_cli/install/extras.py +174 -0
- agent_cli/install/hotkeys.py +48 -0
- agent_cli/install/services.py +87 -0
- agent_cli/memory/__init__.py +7 -0
- agent_cli/memory/_files.py +250 -0
- agent_cli/memory/_filters.py +63 -0
- agent_cli/memory/_git.py +157 -0
- agent_cli/memory/_indexer.py +142 -0
- agent_cli/memory/_ingest.py +408 -0
- agent_cli/memory/_persistence.py +182 -0
- agent_cli/memory/_prompt.py +91 -0
- agent_cli/memory/_retrieval.py +294 -0
- agent_cli/memory/_store.py +169 -0
- agent_cli/memory/_streaming.py +44 -0
- agent_cli/memory/_tasks.py +48 -0
- agent_cli/memory/api.py +113 -0
- agent_cli/memory/client.py +272 -0
- agent_cli/memory/engine.py +361 -0
- agent_cli/memory/entities.py +43 -0
- agent_cli/memory/models.py +112 -0
- agent_cli/opts.py +433 -0
- agent_cli/py.typed +0 -0
- agent_cli/rag/__init__.py +3 -0
- agent_cli/rag/_indexer.py +67 -0
- agent_cli/rag/_indexing.py +226 -0
- agent_cli/rag/_prompt.py +30 -0
- agent_cli/rag/_retriever.py +156 -0
- agent_cli/rag/_store.py +48 -0
- agent_cli/rag/_utils.py +218 -0
- agent_cli/rag/api.py +175 -0
- agent_cli/rag/client.py +299 -0
- agent_cli/rag/engine.py +302 -0
- agent_cli/rag/models.py +55 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/__init__.py +1 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/linux-hotkeys/README.md +63 -0
- agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
- agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
- agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
- agent_cli/scripts/macos-hotkeys/README.md +45 -0
- agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
- agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
- agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
- agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
- agent_cli/scripts/nvidia-asr-server/README.md +99 -0
- agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
- agent_cli/scripts/nvidia-asr-server/server.py +255 -0
- agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
- agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
- agent_cli/scripts/run-openwakeword.sh +11 -0
- agent_cli/scripts/run-piper-windows.ps1 +30 -0
- agent_cli/scripts/run-piper.sh +24 -0
- agent_cli/scripts/run-whisper-linux.sh +40 -0
- agent_cli/scripts/run-whisper-macos.sh +6 -0
- agent_cli/scripts/run-whisper-windows.ps1 +51 -0
- agent_cli/scripts/run-whisper.sh +9 -0
- agent_cli/scripts/run_faster_whisper_server.py +136 -0
- agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
- agent_cli/scripts/setup-linux.sh +108 -0
- agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
- agent_cli/scripts/setup-macos.sh +76 -0
- agent_cli/scripts/setup-windows.ps1 +63 -0
- agent_cli/scripts/start-all-services-windows.ps1 +53 -0
- agent_cli/scripts/start-all-services.sh +178 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/__init__.py +3 -0
- agent_cli/server/cli.py +721 -0
- agent_cli/server/common.py +222 -0
- agent_cli/server/model_manager.py +288 -0
- agent_cli/server/model_registry.py +225 -0
- agent_cli/server/proxy/__init__.py +3 -0
- agent_cli/server/proxy/api.py +444 -0
- agent_cli/server/streaming.py +67 -0
- agent_cli/server/tts/__init__.py +3 -0
- agent_cli/server/tts/api.py +335 -0
- agent_cli/server/tts/backends/__init__.py +82 -0
- agent_cli/server/tts/backends/base.py +139 -0
- agent_cli/server/tts/backends/kokoro.py +403 -0
- agent_cli/server/tts/backends/piper.py +253 -0
- agent_cli/server/tts/model_manager.py +201 -0
- agent_cli/server/tts/model_registry.py +28 -0
- agent_cli/server/tts/wyoming_handler.py +249 -0
- agent_cli/server/whisper/__init__.py +3 -0
- agent_cli/server/whisper/api.py +413 -0
- agent_cli/server/whisper/backends/__init__.py +89 -0
- agent_cli/server/whisper/backends/base.py +97 -0
- agent_cli/server/whisper/backends/faster_whisper.py +225 -0
- agent_cli/server/whisper/backends/mlx.py +270 -0
- agent_cli/server/whisper/languages.py +116 -0
- agent_cli/server/whisper/model_manager.py +157 -0
- agent_cli/server/whisper/model_registry.py +28 -0
- agent_cli/server/whisper/wyoming_handler.py +203 -0
- agent_cli/services/__init__.py +343 -0
- agent_cli/services/_wyoming_utils.py +64 -0
- agent_cli/services/asr.py +506 -0
- agent_cli/services/llm.py +228 -0
- agent_cli/services/tts.py +450 -0
- agent_cli/services/wake_word.py +142 -0
- agent_cli-0.70.5.dist-info/METADATA +2118 -0
- agent_cli-0.70.5.dist-info/RECORD +196 -0
- agent_cli-0.70.5.dist-info/WHEEL +4 -0
- agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
- agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,2118 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: agent-cli
|
|
3
|
+
Version: 0.70.5
|
|
4
|
+
Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
|
|
5
|
+
Project-URL: Homepage, https://github.com/basnijholt/agent-cli
|
|
6
|
+
Author-email: Bas Nijholt <bas@nijho.lt>
|
|
7
|
+
License-File: LICENSE
|
|
8
|
+
Requires-Python: <3.14,>=3.11
|
|
9
|
+
Requires-Dist: dotenv
|
|
10
|
+
Requires-Dist: httpx
|
|
11
|
+
Requires-Dist: psutil; sys_platform == 'win32'
|
|
12
|
+
Requires-Dist: pydantic
|
|
13
|
+
Requires-Dist: pyperclip
|
|
14
|
+
Requires-Dist: rich
|
|
15
|
+
Requires-Dist: setproctitle
|
|
16
|
+
Requires-Dist: typer
|
|
17
|
+
Requires-Dist: typer-slim[standard]
|
|
18
|
+
Provides-Extra: audio
|
|
19
|
+
Requires-Dist: numpy; extra == 'audio'
|
|
20
|
+
Requires-Dist: sounddevice>=0.4.6; extra == 'audio'
|
|
21
|
+
Requires-Dist: wyoming>=1.5.2; extra == 'audio'
|
|
22
|
+
Provides-Extra: dev
|
|
23
|
+
Requires-Dist: markdown-code-runner>=2.7.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: markdown-gfm-admonition; extra == 'dev'
|
|
25
|
+
Requires-Dist: notebook; extra == 'dev'
|
|
26
|
+
Requires-Dist: pre-commit-uv>=4.1.4; extra == 'dev'
|
|
27
|
+
Requires-Dist: pre-commit>=3.0.0; extra == 'dev'
|
|
28
|
+
Requires-Dist: pylint>=3.0.0; extra == 'dev'
|
|
29
|
+
Requires-Dist: pytest-asyncio>=0.20.0; extra == 'dev'
|
|
30
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
|
|
31
|
+
Requires-Dist: pytest-mock; extra == 'dev'
|
|
32
|
+
Requires-Dist: pytest-timeout; extra == 'dev'
|
|
33
|
+
Requires-Dist: pytest>=7.0.0; extra == 'dev'
|
|
34
|
+
Requires-Dist: ruff; extra == 'dev'
|
|
35
|
+
Requires-Dist: versioningit; extra == 'dev'
|
|
36
|
+
Requires-Dist: zensical; extra == 'dev'
|
|
37
|
+
Provides-Extra: faster-whisper
|
|
38
|
+
Requires-Dist: fastapi[standard]; extra == 'faster-whisper'
|
|
39
|
+
Requires-Dist: faster-whisper>=1.0.0; extra == 'faster-whisper'
|
|
40
|
+
Provides-Extra: kokoro
|
|
41
|
+
Requires-Dist: fastapi[standard]; extra == 'kokoro'
|
|
42
|
+
Requires-Dist: huggingface-hub>=0.20.0; extra == 'kokoro'
|
|
43
|
+
Requires-Dist: kokoro>=0.9.0; extra == 'kokoro'
|
|
44
|
+
Requires-Dist: pip; extra == 'kokoro'
|
|
45
|
+
Requires-Dist: soundfile>=0.12.0; extra == 'kokoro'
|
|
46
|
+
Requires-Dist: transformers>=4.40.0; extra == 'kokoro'
|
|
47
|
+
Provides-Extra: llm
|
|
48
|
+
Requires-Dist: pydantic-ai-slim[duckduckgo,google,openai,vertexai]>=0.1.1; extra == 'llm'
|
|
49
|
+
Provides-Extra: memory
|
|
50
|
+
Requires-Dist: chromadb>=0.4.22; extra == 'memory'
|
|
51
|
+
Requires-Dist: fastapi[standard]; extra == 'memory'
|
|
52
|
+
Requires-Dist: huggingface-hub>=0.20.0; extra == 'memory'
|
|
53
|
+
Requires-Dist: onnxruntime>=1.17.0; extra == 'memory'
|
|
54
|
+
Requires-Dist: pyyaml>=6.0.0; extra == 'memory'
|
|
55
|
+
Requires-Dist: transformers>=4.30.0; extra == 'memory'
|
|
56
|
+
Requires-Dist: watchfiles>=0.21.0; extra == 'memory'
|
|
57
|
+
Provides-Extra: mlx-whisper
|
|
58
|
+
Requires-Dist: fastapi[standard]; (sys_platform == 'darwin' and platform_machine == 'arm64') and extra == 'mlx-whisper'
|
|
59
|
+
Requires-Dist: mlx-whisper>=0.4.0; (sys_platform == 'darwin' and platform_machine == 'arm64') and extra == 'mlx-whisper'
|
|
60
|
+
Provides-Extra: piper
|
|
61
|
+
Requires-Dist: fastapi[standard]; extra == 'piper'
|
|
62
|
+
Requires-Dist: piper-tts>=1.2.0; extra == 'piper'
|
|
63
|
+
Provides-Extra: rag
|
|
64
|
+
Requires-Dist: chromadb>=0.4.22; extra == 'rag'
|
|
65
|
+
Requires-Dist: fastapi[standard]; extra == 'rag'
|
|
66
|
+
Requires-Dist: huggingface-hub>=0.20.0; extra == 'rag'
|
|
67
|
+
Requires-Dist: markitdown[docx,pdf,pptx]>=0.1.3; extra == 'rag'
|
|
68
|
+
Requires-Dist: onnxruntime>=1.17.0; extra == 'rag'
|
|
69
|
+
Requires-Dist: transformers>=4.30.0; extra == 'rag'
|
|
70
|
+
Requires-Dist: watchfiles>=0.21.0; extra == 'rag'
|
|
71
|
+
Provides-Extra: server
|
|
72
|
+
Requires-Dist: fastapi[standard]; extra == 'server'
|
|
73
|
+
Provides-Extra: speed
|
|
74
|
+
Requires-Dist: audiostretchy>=1.3.0; extra == 'speed'
|
|
75
|
+
Provides-Extra: test
|
|
76
|
+
Requires-Dist: pytest-asyncio>=0.20.0; extra == 'test'
|
|
77
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
|
|
78
|
+
Requires-Dist: pytest-mock; extra == 'test'
|
|
79
|
+
Requires-Dist: pytest-timeout; extra == 'test'
|
|
80
|
+
Requires-Dist: pytest>=7.0.0; extra == 'test'
|
|
81
|
+
Provides-Extra: vad
|
|
82
|
+
Requires-Dist: silero-vad>=5.1; extra == 'vad'
|
|
83
|
+
Provides-Extra: wyoming
|
|
84
|
+
Requires-Dist: wyoming>=1.5.2; extra == 'wyoming'
|
|
85
|
+
Description-Content-Type: text/markdown
|
|
86
|
+
|
|
87
|
+
# Agent CLI
|
|
88
|
+
|
|
89
|
+
<img src="https://raw.githubusercontent.com/basnijholt/agent-cli/refs/heads/main/.github/logo.svg" alt="agent-cli logo" align="right" style="width: 250px;" />
|
|
90
|
+
|
|
91
|
+
`agent-cli` is a collection of **_local-first_**, AI-powered command-line agents that run entirely on your machine.
|
|
92
|
+
It provides a suite of powerful tools for voice and text interaction, designed for privacy, offline capability, and seamless integration with system-wide hotkeys and workflows.
|
|
93
|
+
|
|
94
|
+
> [!TIP]
|
|
95
|
+
> **Short aliases available:** You can use `agent` or `ag` instead of `agent-cli` for convenience.
|
|
96
|
+
|
|
97
|
+
> [!IMPORTANT]
|
|
98
|
+
> **Local and Private by Design**
|
|
99
|
+
> All agents in this tool are designed to run **100% locally**.
|
|
100
|
+
> Your data, whether it's from your clipboard, microphone, or files, is never sent to any cloud API.
|
|
101
|
+
> This ensures your privacy and allows the tools to work completely offline.
|
|
102
|
+
> You can also optionally configure the agents to use OpenAI/Gemini services.
|
|
103
|
+
|
|
104
|
+
<!-- SECTION:why-i-built-this:START -->
|
|
105
|
+
## Why I built this
|
|
106
|
+
|
|
107
|
+
I got tired of typing long prompts to LLMs. Speaking is faster, so I built this tool to transcribe my voice directly to the clipboard with a hotkey.
|
|
108
|
+
|
|
109
|
+
**What it does:**
|
|
110
|
+
|
|
111
|
+
- Voice transcription to clipboard with system-wide hotkeys (Cmd+Shift+R on macOS)
|
|
112
|
+
- Autocorrect any text from your clipboard
|
|
113
|
+
- Edit clipboard content with voice commands ("make this more formal")
|
|
114
|
+
- Runs locally - no internet required, your audio stays on your machine
|
|
115
|
+
- Works with any app that can copy/paste
|
|
116
|
+
|
|
117
|
+
I use it mostly for the `transcribe` command when working with LLMs. Being able to speak naturally means I can provide more context without the typing fatigue.
|
|
118
|
+
|
|
119
|
+
Since then I have expanded the tool with many more features, all focused on local-first AI agents that integrate seamlessly with your system.
|
|
120
|
+
<!-- SECTION:why-i-built-this:END -->
|
|
121
|
+
|
|
122
|
+
[](http://www.youtube.com/watch?v=7sBTCgttH48 "Agent-CLI: Local AI Voice & Text Tools on Your Desktop (macOS Demo)")
|
|
123
|
+
|
|
124
|
+
*See agent-cli in action: [Watch the demo](https://www.youtube.com/watch?v=7sBTCgttH48)*
|
|
125
|
+
|
|
126
|
+
## Features
|
|
127
|
+
|
|
128
|
+
- **[`autocorrect`](docs/commands/autocorrect.md)**: Correct grammar and spelling in your text using a local LLM.
|
|
129
|
+
- **[`transcribe`](docs/commands/transcribe.md)**: Transcribe audio from your microphone to clipboard.
|
|
130
|
+
- **[`speak`](docs/commands/speak.md)**: Convert text to speech using a local TTS engine.
|
|
131
|
+
- **[`voice-edit`](docs/commands/voice-edit.md)**: Edit clipboard text with voice commands.
|
|
132
|
+
- **[`assistant`](docs/commands/assistant.md)**: Wake word-based voice assistant.
|
|
133
|
+
- **[`chat`](docs/commands/chat.md)**: Conversational AI with tool-calling capabilities.
|
|
134
|
+
- **[`memory`](docs/commands/memory.md)**: Long-term memory system with `memory proxy` and `memory add`.
|
|
135
|
+
- **[`rag-proxy`](docs/commands/rag-proxy.md)**: RAG proxy server for chatting with your documents.
|
|
136
|
+
- **[`dev`](docs/commands/dev.md)**: Parallel development with git worktrees and AI coding agents.
|
|
137
|
+
- **[`server`](docs/commands/server/index.md)**: Local ASR and TTS servers with dual-protocol (Wyoming & OpenAI), TTL-based memory management, and multi-platform acceleration. Whisper uses MLX on Apple Silicon or Faster Whisper on Linux/CUDA. TTS supports Kokoro (GPU) or Piper (CPU).
|
|
138
|
+
- **[`transcribe-daemon`](docs/commands/transcribe-daemon.md)**: Continuous background transcription with VAD. Install with `uv tool install "agent-cli[vad]" -p 3.13`.
|
|
139
|
+
|
|
140
|
+
## Quick Start
|
|
141
|
+
|
|
142
|
+
### Just want the CLI tool?
|
|
143
|
+
|
|
144
|
+
If you already have AI services running (or plan to use OpenAI), simply install:
|
|
145
|
+
|
|
146
|
+
```bash
|
|
147
|
+
# Using uv (recommended)
|
|
148
|
+
uv tool install agent-cli -p 3.13
|
|
149
|
+
|
|
150
|
+
# Using pip
|
|
151
|
+
pip install agent-cli
|
|
152
|
+
```
|
|
153
|
+
|
|
154
|
+
> [!NOTE]
|
|
155
|
+
> The `-p 3.13` flag is required because some dependencies (like `onnxruntime`) don't support Python 3.14 yet.
|
|
156
|
+
> See [uv issue #8206](https://github.com/astral-sh/uv/issues/8206) for details.
|
|
157
|
+
|
|
158
|
+
Then use it:
|
|
159
|
+
```bash
|
|
160
|
+
agent-cli autocorrect "this has an eror"
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
### Want automatic setup with everything?
|
|
164
|
+
|
|
165
|
+
We offer two ways to set up agent-cli with all services:
|
|
166
|
+
|
|
167
|
+
#### Option A: Using Shell Scripts (Traditional)
|
|
168
|
+
|
|
169
|
+
```bash
|
|
170
|
+
# 1. Clone the repository
|
|
171
|
+
git clone https://github.com/basnijholt/agent-cli.git
|
|
172
|
+
cd agent-cli
|
|
173
|
+
|
|
174
|
+
# 2. Run setup (installs all services + agent-cli)
|
|
175
|
+
./scripts/setup-macos.sh # or setup-linux.sh
|
|
176
|
+
|
|
177
|
+
# 3. Start services
|
|
178
|
+
./scripts/start-all-services.sh
|
|
179
|
+
|
|
180
|
+
# 4. (Optional) Set up system-wide hotkeys
|
|
181
|
+
./scripts/setup-macos-hotkeys.sh # or setup-linux-hotkeys.sh
|
|
182
|
+
|
|
183
|
+
# 5. Use it!
|
|
184
|
+
agent-cli autocorrect "this has an eror"
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
#### Option B: Using CLI Commands (New!)
|
|
188
|
+
|
|
189
|
+
> [!NOTE]
|
|
190
|
+
> `agent-cli` uses `sounddevice` for real-time microphone/voice features.
|
|
191
|
+
> On Linux only, you need to install the system-level PortAudio library (`sudo apt install portaudio19-dev` / your distro's equivalent on Linux) **before** you run `uv tool install agent-cli -p 3.13`.
|
|
192
|
+
> On Windows and macOS, this is handled automatically.
|
|
193
|
+
|
|
194
|
+
```bash
|
|
195
|
+
# 1. Install agent-cli
|
|
196
|
+
uv tool install agent-cli -p 3.13
|
|
197
|
+
|
|
198
|
+
# 2. Install all required services
|
|
199
|
+
agent-cli install-services
|
|
200
|
+
|
|
201
|
+
# 3. Start all services
|
|
202
|
+
agent-cli start-services
|
|
203
|
+
|
|
204
|
+
# 4. (Optional) Set up system-wide hotkeys
|
|
205
|
+
agent-cli install-hotkeys
|
|
206
|
+
|
|
207
|
+
# 5. Use it!
|
|
208
|
+
agent-cli autocorrect "this has an eror"
|
|
209
|
+
```
|
|
210
|
+
|
|
211
|
+
The setup scripts automatically install:
|
|
212
|
+
- ✅ Package managers (Homebrew/uv) if needed
|
|
213
|
+
- ✅ All AI services (Ollama, Whisper, TTS, etc.)
|
|
214
|
+
- ✅ The `agent-cli` tool
|
|
215
|
+
- ✅ System dependencies
|
|
216
|
+
- ✅ Hotkey managers (if using hotkey scripts)
|
|
217
|
+
|
|
218
|
+
<details><summary><b><u>[ToC]</u></b> 📚</summary>
|
|
219
|
+
|
|
220
|
+
<!-- START doctoc generated TOC please keep comment here to allow auto update -->
|
|
221
|
+
<!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
|
|
222
|
+
|
|
223
|
+
- [Installation](#installation)
|
|
224
|
+
- [Option 1: CLI Tool Only](#option-1-cli-tool-only)
|
|
225
|
+
- [Option 2: Automated Full Setup](#option-2-automated-full-setup)
|
|
226
|
+
- [Step 1: Clone the Repository](#step-1-clone-the-repository)
|
|
227
|
+
- [Step 2: Run the Setup Script](#step-2-run-the-setup-script)
|
|
228
|
+
- [Step 3: Start All Services](#step-3-start-all-services)
|
|
229
|
+
- [Step 4: Test Your Installation](#step-4-test-your-installation)
|
|
230
|
+
- [System Integration](#system-integration)
|
|
231
|
+
- [macOS Hotkeys](#macos-hotkeys)
|
|
232
|
+
- [Linux Hotkeys](#linux-hotkeys)
|
|
233
|
+
- [Claude Code Plugin](#claude-code-plugin)
|
|
234
|
+
- [Prerequisites](#prerequisites)
|
|
235
|
+
- [What You Need to Install Manually](#what-you-need-to-install-manually)
|
|
236
|
+
- [What the Setup Scripts Install for You](#what-the-setup-scripts-install-for-you)
|
|
237
|
+
- [Core Requirements (Auto-installed)](#core-requirements-auto-installed)
|
|
238
|
+
- [AI Services (Auto-installed and configured)](#ai-services-auto-installed-and-configured)
|
|
239
|
+
- [Alternative Cloud Services (Optional)](#alternative-cloud-services-optional)
|
|
240
|
+
- [Alternative Local LLM Servers](#alternative-local-llm-servers)
|
|
241
|
+
- [Usage](#usage)
|
|
242
|
+
- [Installation Commands](#installation-commands)
|
|
243
|
+
- [Installing Optional Extras](#installing-optional-extras)
|
|
244
|
+
- [Configuration](#configuration)
|
|
245
|
+
- [Managing Configuration](#managing-configuration)
|
|
246
|
+
- [Provider Defaults](#provider-defaults)
|
|
247
|
+
- [`autocorrect`](#autocorrect)
|
|
248
|
+
- [`transcribe`](#transcribe)
|
|
249
|
+
- [`transcribe-daemon`](#transcribe-daemon)
|
|
250
|
+
- [`speak`](#speak)
|
|
251
|
+
- [`voice-edit`](#voice-edit)
|
|
252
|
+
- [`assistant`](#assistant)
|
|
253
|
+
- [`chat`](#chat)
|
|
254
|
+
- [`rag-proxy`](#rag-proxy)
|
|
255
|
+
- [`memory`](#memory)
|
|
256
|
+
- [`memory proxy`](#memory-proxy)
|
|
257
|
+
- [`memory add`](#memory-add)
|
|
258
|
+
- [Development](#development)
|
|
259
|
+
- [Running Tests](#running-tests)
|
|
260
|
+
- [Pre-commit Hooks](#pre-commit-hooks)
|
|
261
|
+
- [Contributing](#contributing)
|
|
262
|
+
- [License](#license)
|
|
263
|
+
|
|
264
|
+
<!-- END doctoc generated TOC please keep comment here to allow auto update -->
|
|
265
|
+
|
|
266
|
+
</details>
|
|
267
|
+
|
|
268
|
+
|
|
269
|
+
## Installation
|
|
270
|
+
|
|
271
|
+
### Option 1: CLI Tool Only
|
|
272
|
+
|
|
273
|
+
If you already have AI services set up or plan to use cloud services (OpenAI/Gemini):
|
|
274
|
+
|
|
275
|
+
```bash
|
|
276
|
+
# Using uv (recommended)
|
|
277
|
+
uv tool install agent-cli -p 3.13
|
|
278
|
+
|
|
279
|
+
# Using pip
|
|
280
|
+
pip install agent-cli
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
### Option 2: Automated Full Setup
|
|
284
|
+
|
|
285
|
+
For a complete local setup with all AI services:
|
|
286
|
+
|
|
287
|
+
#### Step 1: Clone the Repository
|
|
288
|
+
|
|
289
|
+
```bash
|
|
290
|
+
git clone https://github.com/basnijholt/agent-cli.git
|
|
291
|
+
cd agent-cli
|
|
292
|
+
```
|
|
293
|
+
|
|
294
|
+
#### Step 2: Run the Setup Script
|
|
295
|
+
|
|
296
|
+
| Platform | Setup Command | What It Does | Detailed Guide |
|
|
297
|
+
|----------|---------------|--------------|----------------|
|
|
298
|
+
| **🍎 macOS** | `./scripts/setup-macos.sh` | Installs Homebrew (if needed), uv, Ollama, all services, and agent-cli | [macOS Guide](docs/installation/macos.md) |
|
|
299
|
+
| **🐧 Linux** | `./scripts/setup-linux.sh` | Installs uv, Ollama, all services, and agent-cli | [Linux Guide](docs/installation/linux.md) |
|
|
300
|
+
| **❄️ NixOS** | See guide → | Special instructions for NixOS | [NixOS Guide](docs/installation/nixos.md) |
|
|
301
|
+
| **🐳 Docker** | See guide → | Container-based setup (slower) | [Docker Guide](docs/installation/docker.md) |
|
|
302
|
+
|
|
303
|
+
#### Step 3: Start All Services
|
|
304
|
+
|
|
305
|
+
```bash
|
|
306
|
+
./scripts/start-all-services.sh
|
|
307
|
+
```
|
|
308
|
+
|
|
309
|
+
This launches all AI services in a single terminal session using Zellij.
|
|
310
|
+
|
|
311
|
+
#### Step 4: Test Your Installation
|
|
312
|
+
|
|
313
|
+
```bash
|
|
314
|
+
agent-cli autocorrect "this has an eror"
|
|
315
|
+
# Output: this has an error
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
> [!NOTE]
|
|
319
|
+
> The setup scripts handle everything automatically. For platform-specific details or troubleshooting, see the [installation guides](docs/installation/).
|
|
320
|
+
|
|
321
|
+
<details><summary><b>Development Installation</b></summary>
|
|
322
|
+
|
|
323
|
+
For contributing or development:
|
|
324
|
+
|
|
325
|
+
```bash
|
|
326
|
+
git clone https://github.com/basnijholt/agent-cli.git
|
|
327
|
+
cd agent-cli
|
|
328
|
+
uv sync
|
|
329
|
+
source .venv/bin/activate # On Windows: .venv\Scripts\activate
|
|
330
|
+
```
|
|
331
|
+
|
|
332
|
+
</details>
|
|
333
|
+
|
|
334
|
+
## System Integration
|
|
335
|
+
|
|
336
|
+
Want system-wide hotkeys? You'll need the repository for the setup scripts:
|
|
337
|
+
|
|
338
|
+
```bash
|
|
339
|
+
# If you haven't already cloned it
|
|
340
|
+
git clone https://github.com/basnijholt/agent-cli.git
|
|
341
|
+
cd agent-cli
|
|
342
|
+
```
|
|
343
|
+
|
|
344
|
+
### macOS Hotkeys
|
|
345
|
+
|
|
346
|
+
```bash
|
|
347
|
+
./scripts/setup-macos-hotkeys.sh
|
|
348
|
+
```
|
|
349
|
+
|
|
350
|
+
This script automatically:
|
|
351
|
+
- ✅ Installs Homebrew if not present
|
|
352
|
+
- ✅ Installs skhd (hotkey daemon) and terminal-notifier
|
|
353
|
+
- ✅ Configures these system-wide hotkeys:
|
|
354
|
+
- **`Cmd+Shift+R`** - Toggle voice transcription
|
|
355
|
+
- **`Cmd+Shift+A`** - Autocorrect clipboard text
|
|
356
|
+
- **`Cmd+Shift+V`** - Voice edit clipboard text
|
|
357
|
+
|
|
358
|
+
> [!NOTE]
|
|
359
|
+
> After setup, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility
|
|
360
|
+
|
|
361
|
+
> [!TIP]
|
|
362
|
+
> To keep the “Listening…” indicator visible for the whole recording, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions).
|
|
363
|
+
> Also enable "Allow notification when mirroring or sharing the display".
|
|
364
|
+
> The hotkey scripts keep only the recording notification pinned; status and result toasts auto-dismiss.
|
|
365
|
+
|
|
366
|
+
### Linux Hotkeys
|
|
367
|
+
|
|
368
|
+
```bash
|
|
369
|
+
./scripts/setup-linux-hotkeys.sh
|
|
370
|
+
```
|
|
371
|
+
|
|
372
|
+
This script automatically:
|
|
373
|
+
- ✅ Installs notification tools if needed
|
|
374
|
+
- ✅ Provides configuration for your desktop environment
|
|
375
|
+
- ✅ Sets up these hotkeys:
|
|
376
|
+
- **`Super+Shift+R`** - Toggle voice transcription
|
|
377
|
+
- **`Super+Shift+A`** - Autocorrect clipboard text
|
|
378
|
+
- **`Super+Shift+V`** - Voice edit clipboard text
|
|
379
|
+
|
|
380
|
+
The script supports Hyprland, GNOME, KDE, Sway, i3, XFCE, and provides instructions for manual configuration on other environments.
|
|
381
|
+
|
|
382
|
+
### Claude Code Plugin
|
|
383
|
+
|
|
384
|
+
The [`dev`](docs/commands/dev.md) command is also available as a **Claude Code plugin**, enabling Claude to automatically spawn parallel AI agents in isolated git worktrees when you ask it to work on multiple features.
|
|
385
|
+
|
|
386
|
+
```bash
|
|
387
|
+
# Option 1: Install skill directly in your project (recommended)
|
|
388
|
+
agent-cli dev install-skill
|
|
389
|
+
|
|
390
|
+
# Option 2: Install via Claude Code plugin marketplace
|
|
391
|
+
claude plugin marketplace add basnijholt/agent-cli
|
|
392
|
+
claude plugin install agent-cli@agent-cli-dev
|
|
393
|
+
```
|
|
394
|
+
|
|
395
|
+
Once installed, Claude Code can automatically use this skill when you ask to:
|
|
396
|
+
- "Work on these 3 features in parallel"
|
|
397
|
+
- "Spawn agents for auth and payments"
|
|
398
|
+
- "Delegate this refactoring to a separate agent"
|
|
399
|
+
|
|
400
|
+
See the [plugin documentation](.claude-plugin/README.md) for more details.
|
|
401
|
+
|
|
402
|
+
## Prerequisites
|
|
403
|
+
|
|
404
|
+
### What You Need to Install Manually
|
|
405
|
+
|
|
406
|
+
The only thing you need to have installed is **Git** to clone this repository. Everything else is handled automatically!
|
|
407
|
+
|
|
408
|
+
### What the Setup Scripts Install for You
|
|
409
|
+
|
|
410
|
+
Our installation scripts automatically handle all dependencies:
|
|
411
|
+
|
|
412
|
+
#### Core Requirements (Auto-installed)
|
|
413
|
+
- 🍺 **Homebrew** (macOS) - Installed if not present
|
|
414
|
+
- 🐍 **uv** - Python package manager - Installed automatically
|
|
415
|
+
- 📋 **Clipboard Tools** - Pre-installed on macOS, handled on Linux
|
|
416
|
+
|
|
417
|
+
#### AI Services (Auto-installed and configured)
|
|
418
|
+
|
|
419
|
+
| Service | Purpose | Auto-installed? |
|
|
420
|
+
|---------|---------|-----------------|
|
|
421
|
+
| **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model |
|
|
422
|
+
| **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` |
|
|
423
|
+
| **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[whisper]"` |
|
|
424
|
+
| **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` |
|
|
425
|
+
| **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later |
|
|
426
|
+
| **[Wyoming openWakeWord](https://github.com/rhasspy/wyoming-openwakeword)** | Wake word detection | ✅ Yes, for `assistant` |
|
|
427
|
+
|
|
428
|
+
> **Why `agent-cli server whisper`?** The built-in Whisper server offers an OpenAI-compatible API (drop-in replacement), Wyoming protocol for Home Assistant, TTL-based VRAM management (auto-unloads idle models), and auto-selects the optimal backend ([MLX](https://github.com/ml-explore/mlx-examples/tree/main/whisper) on Apple Silicon, [faster-whisper](https://github.com/SYSTRAN/faster-whisper) on Linux/CUDA). Docker images available at `ghcr.io/basnijholt/agent-cli-whisper`.
|
|
429
|
+
|
|
430
|
+
#### Alternative Cloud Services (Optional)
|
|
431
|
+
|
|
432
|
+
If you prefer cloud services over local ones:
|
|
433
|
+
|
|
434
|
+
| Service | Purpose | Setup Required |
|
|
435
|
+
|---------|---------|----------------|
|
|
436
|
+
| **OpenAI** | LLM, Speech-to-text, TTS | API key in config |
|
|
437
|
+
| **Gemini** | LLM alternative | API key in config |
|
|
438
|
+
|
|
439
|
+
#### Alternative Local LLM Servers
|
|
440
|
+
|
|
441
|
+
You can also use other OpenAI-compatible local servers:
|
|
442
|
+
|
|
443
|
+
| Server | Purpose | Setup Required |
|
|
444
|
+
|---------|---------|----------------|
|
|
445
|
+
| **llama.cpp** | Local LLM inference | Use `--openai-base-url http://localhost:8080/v1` |
|
|
446
|
+
| **vLLM** | High-performance LLM serving | Use `--openai-base-url` with server endpoint |
|
|
447
|
+
| **Ollama** | Default local LLM | Already configured as default |
|
|
448
|
+
|
|
449
|
+
## Usage
|
|
450
|
+
|
|
451
|
+
This package provides multiple command-line tools, each designed for a specific purpose.
|
|
452
|
+
|
|
453
|
+
### Installation Commands
|
|
454
|
+
|
|
455
|
+
These commands help you set up `agent-cli` and its required services:
|
|
456
|
+
|
|
457
|
+
- **`install-services`**: Install all required AI services (Ollama, Whisper, Piper, OpenWakeWord)
|
|
458
|
+
- **`install-hotkeys`**: Set up system-wide hotkeys for quick access to agent-cli features
|
|
459
|
+
- **`install-extras`**: Install optional Python dependencies (rag, memory, vad, etc.) with pinned versions
|
|
460
|
+
- **`start-services`**: Start all services in a Zellij terminal session
|
|
461
|
+
|
|
462
|
+
All necessary scripts are bundled with the package, so you can run these commands immediately after installing `agent-cli`.
|
|
463
|
+
|
|
464
|
+
#### Installing Optional Extras
|
|
465
|
+
|
|
466
|
+
Some features require additional Python dependencies. By default, **agent-cli will auto-install missing extras** when you run a command that needs them. To disable this, set `AGENT_CLI_NO_AUTO_INSTALL=1` or add to your config file:
|
|
467
|
+
|
|
468
|
+
```toml
|
|
469
|
+
[settings]
|
|
470
|
+
auto_install_extras = false
|
|
471
|
+
```
|
|
472
|
+
|
|
473
|
+
You can also manually install extras with `install-extras`:
|
|
474
|
+
|
|
475
|
+
```bash
|
|
476
|
+
# List available extras
|
|
477
|
+
agent-cli install-extras --list
|
|
478
|
+
|
|
479
|
+
# Install specific extras
|
|
480
|
+
agent-cli install-extras rag memory vad
|
|
481
|
+
```
|
|
482
|
+
|
|
483
|
+
<details>
|
|
484
|
+
<summary>See the output of <code>agent-cli install-extras --help</code></summary>
|
|
485
|
+
|
|
486
|
+
<!-- CODE:BASH:START -->
|
|
487
|
+
<!-- echo '```yaml' -->
|
|
488
|
+
<!-- export NO_COLOR=1 -->
|
|
489
|
+
<!-- export TERM=dumb -->
|
|
490
|
+
<!-- export COLUMNS=90 -->
|
|
491
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
492
|
+
<!-- agent-cli install-extras --help -->
|
|
493
|
+
<!-- echo '```' -->
|
|
494
|
+
<!-- CODE:END -->
|
|
495
|
+
<!-- OUTPUT:START -->
|
|
496
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
497
|
+
```yaml
|
|
498
|
+
|
|
499
|
+
Usage: agent-cli install-extras [OPTIONS] [EXTRAS]...
|
|
500
|
+
|
|
501
|
+
Install optional extras (rag, memory, vad, etc.) with pinned versions.
|
|
502
|
+
|
|
503
|
+
Examples:
|
|
504
|
+
|
|
505
|
+
• agent-cli install-extras rag # Install RAG dependencies
|
|
506
|
+
• agent-cli install-extras memory vad # Install multiple extras
|
|
507
|
+
• agent-cli install-extras --list # Show available extras
|
|
508
|
+
• agent-cli install-extras --all # Install all extras
|
|
509
|
+
|
|
510
|
+
╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
|
|
511
|
+
│ extras [EXTRAS]... Extras to install │
|
|
512
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
513
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
514
|
+
│ --list -l List available extras │
|
|
515
|
+
│ --all -a Install all available extras │
|
|
516
|
+
│ --help -h Show this message and exit. │
|
|
517
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
518
|
+
|
|
519
|
+
```
|
|
520
|
+
|
|
521
|
+
<!-- OUTPUT:END -->
|
|
522
|
+
|
|
523
|
+
</details>
|
|
524
|
+
|
|
525
|
+
### Configuration
|
|
526
|
+
|
|
527
|
+
All `agent-cli` commands can be configured using a TOML file. The configuration file is searched for in the following locations, in order:
|
|
528
|
+
|
|
529
|
+
1. `./agent-cli-config.toml` (in the current directory)
|
|
530
|
+
2. `~/.config/agent-cli/config.toml`
|
|
531
|
+
|
|
532
|
+
You can also specify a path to a configuration file using the `--config` option, e.g., `agent-cli transcribe --config /path/to/your/config.toml`.
|
|
533
|
+
|
|
534
|
+
Command-line options always take precedence over settings in the configuration file.
|
|
535
|
+
|
|
536
|
+
#### Managing Configuration
|
|
537
|
+
|
|
538
|
+
Use the `config` command to manage your configuration files:
|
|
539
|
+
|
|
540
|
+
```bash
|
|
541
|
+
# Create a new config file with all options (commented out as a template)
|
|
542
|
+
agent-cli config init
|
|
543
|
+
|
|
544
|
+
# View your current config (syntax highlighted)
|
|
545
|
+
agent-cli config show
|
|
546
|
+
|
|
547
|
+
# View config as raw text (for copy-paste)
|
|
548
|
+
agent-cli config show --raw
|
|
549
|
+
|
|
550
|
+
# Open config in your editor ($EDITOR, or nano/vim)
|
|
551
|
+
agent-cli config edit
|
|
552
|
+
```
|
|
553
|
+
|
|
554
|
+
<details>
|
|
555
|
+
<summary>See the output of <code>agent-cli config --help</code></summary>
|
|
556
|
+
|
|
557
|
+
<!-- CODE:BASH:START -->
|
|
558
|
+
<!-- echo '```yaml' -->
|
|
559
|
+
<!-- export NO_COLOR=1 -->
|
|
560
|
+
<!-- export TERM=dumb -->
|
|
561
|
+
<!-- export COLUMNS=90 -->
|
|
562
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
563
|
+
<!-- agent-cli config --help -->
|
|
564
|
+
<!-- echo '```' -->
|
|
565
|
+
<!-- CODE:END -->
|
|
566
|
+
<!-- OUTPUT:START -->
|
|
567
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
568
|
+
```yaml
|
|
569
|
+
|
|
570
|
+
Usage: agent-cli config [OPTIONS] COMMAND [ARGS]...
|
|
571
|
+
|
|
572
|
+
Manage agent-cli configuration files.
|
|
573
|
+
|
|
574
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
575
|
+
│ --help -h Show this message and exit. │
|
|
576
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
577
|
+
╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
|
|
578
|
+
│ init Create a new config file with all options commented out. │
|
|
579
|
+
│ edit Open the config file in your default editor. │
|
|
580
|
+
│ show Display the config file location and contents. │
|
|
581
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
582
|
+
|
|
583
|
+
```
|
|
584
|
+
|
|
585
|
+
<!-- OUTPUT:END -->
|
|
586
|
+
|
|
587
|
+
</details>
|
|
588
|
+
|
|
589
|
+
An example configuration file is also provided in [`example.agent-cli-config.toml`](./example.agent-cli-config.toml).
|
|
590
|
+
|
|
591
|
+
#### Provider Defaults
|
|
592
|
+
|
|
593
|
+
You can choose local or cloud services per capability by setting provider keys in
|
|
594
|
+
the `[defaults]` section of your configuration file.
|
|
595
|
+
|
|
596
|
+
```toml
|
|
597
|
+
[defaults]
|
|
598
|
+
# llm_provider = "ollama" # 'ollama', 'openai', or 'gemini'
|
|
599
|
+
# asr_provider = "wyoming" # 'wyoming' or 'openai'
|
|
600
|
+
# tts_provider = "wyoming" # 'wyoming', 'openai', or 'kokoro'
|
|
601
|
+
# openai_api_key = "sk-..."
|
|
602
|
+
# gemini_api_key = "..."
|
|
603
|
+
```
|
|
604
|
+
|
|
605
|
+
### `autocorrect`
|
|
606
|
+
|
|
607
|
+
**Purpose:** Quickly fix spelling and grammar in any text you've copied.
|
|
608
|
+
|
|
609
|
+
**Workflow:** This is a simple, one-shot command.
|
|
610
|
+
|
|
611
|
+
1. It reads text from your system clipboard (or from a direct argument).
|
|
612
|
+
2. It sends the text to a local Ollama LLM with a prompt to perform only technical corrections.
|
|
613
|
+
3. The corrected text is copied back to your clipboard, replacing the original.
|
|
614
|
+
|
|
615
|
+
**How to Use It:** This tool is ideal for integrating with a system-wide hotkey.
|
|
616
|
+
|
|
617
|
+
- **From Clipboard**: `agent-cli autocorrect`
|
|
618
|
+
- **From Argument**: `agent-cli autocorrect "this text has an eror"`
|
|
619
|
+
|
|
620
|
+
<details>
|
|
621
|
+
<summary>See the output of <code>agent-cli autocorrect --help</code></summary>
|
|
622
|
+
|
|
623
|
+
<!-- CODE:BASH:START -->
|
|
624
|
+
<!-- echo '```yaml' -->
|
|
625
|
+
<!-- export NO_COLOR=1 -->
|
|
626
|
+
<!-- export TERM=dumb -->
|
|
627
|
+
<!-- export COLUMNS=90 -->
|
|
628
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
629
|
+
<!-- agent-cli autocorrect --help -->
|
|
630
|
+
<!-- echo '```' -->
|
|
631
|
+
<!-- CODE:END -->
|
|
632
|
+
<!-- OUTPUT:START -->
|
|
633
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
634
|
+
```yaml
|
|
635
|
+
|
|
636
|
+
Usage: agent-cli autocorrect [OPTIONS] [TEXT]
|
|
637
|
+
|
|
638
|
+
Correct text from clipboard using a local or remote LLM.
|
|
639
|
+
|
|
640
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
641
|
+
│ text [TEXT] The text to correct. If not provided, reads from clipboard. │
|
|
642
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
643
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
644
|
+
│ --help -h Show this message and exit. │
|
|
645
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
646
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
647
|
+
│ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
|
|
648
|
+
│ [env var: LLM_PROVIDER] │
|
|
649
|
+
│ [default: ollama] │
|
|
650
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
651
|
+
╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
|
|
652
|
+
│ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
|
|
653
|
+
│ [env var: LLM_OLLAMA_MODEL] │
|
|
654
|
+
│ [default: gemma3:4b] │
|
|
655
|
+
│ --llm-ollama-host TEXT The Ollama server host. Default is │
|
|
656
|
+
│ http://localhost:11434. │
|
|
657
|
+
│ [env var: LLM_OLLAMA_HOST] │
|
|
658
|
+
│ [default: http://localhost:11434] │
|
|
659
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
660
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
661
|
+
│ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
|
|
662
|
+
│ [env var: LLM_OPENAI_MODEL] │
|
|
663
|
+
│ [default: gpt-5-mini] │
|
|
664
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
665
|
+
│ OPENAI_API_KEY environment variable. │
|
|
666
|
+
│ [env var: OPENAI_API_KEY] │
|
|
667
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
668
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
669
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
670
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
671
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
672
|
+
│ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
|
|
673
|
+
│ [env var: LLM_GEMINI_MODEL] │
|
|
674
|
+
│ [default: gemini-3-flash-preview] │
|
|
675
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
676
|
+
│ GEMINI_API_KEY environment variable. │
|
|
677
|
+
│ [env var: GEMINI_API_KEY] │
|
|
678
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
679
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
680
|
+
│ --log-level [debug|info|warning|error] Set logging level. │
|
|
681
|
+
│ [env var: LOG_LEVEL] │
|
|
682
|
+
│ [default: info] │
|
|
683
|
+
│ --log-file TEXT Path to a file to write logs to. │
|
|
684
|
+
│ --quiet -q Suppress console output from rich. │
|
|
685
|
+
│ --json Output result as JSON for │
|
|
686
|
+
│ automation. Implies --quiet and │
|
|
687
|
+
│ --no-clipboard. │
|
|
688
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
689
|
+
│ --print-args Print the command line arguments, │
|
|
690
|
+
│ including variables taken from the │
|
|
691
|
+
│ configuration file. │
|
|
692
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
693
|
+
|
|
694
|
+
```
|
|
695
|
+
|
|
696
|
+
<!-- OUTPUT:END -->
|
|
697
|
+
|
|
698
|
+
</details>
|
|
699
|
+
|
|
700
|
+
### `transcribe`
|
|
701
|
+
|
|
702
|
+
**Purpose:** A simple tool to turn your speech into text.
|
|
703
|
+
|
|
704
|
+
**Workflow:** This agent listens to your microphone and converts your speech to text in real-time.
|
|
705
|
+
|
|
706
|
+
1. Run the command. It will start listening immediately.
|
|
707
|
+
2. Speak into your microphone.
|
|
708
|
+
3. Press `Ctrl+C` to stop recording.
|
|
709
|
+
4. The transcribed text is copied to your clipboard.
|
|
710
|
+
5. Optionally, use the `--llm` flag to have an Ollama model clean up the raw transcript (fixing punctuation, etc.).
|
|
711
|
+
|
|
712
|
+
**How to Use It:**
|
|
713
|
+
|
|
714
|
+
- **Simple Transcription**: `agent-cli transcribe --input-device-index 1`
|
|
715
|
+
- **With LLM Cleanup**: `agent-cli transcribe --input-device-index 1 --llm`
|
|
716
|
+
|
|
717
|
+
<details>
|
|
718
|
+
<summary>See the output of <code>agent-cli transcribe --help</code></summary>
|
|
719
|
+
|
|
720
|
+
<!-- CODE:BASH:START -->
|
|
721
|
+
<!-- echo '```yaml' -->
|
|
722
|
+
<!-- export NO_COLOR=1 -->
|
|
723
|
+
<!-- export TERM=dumb -->
|
|
724
|
+
<!-- export COLUMNS=90 -->
|
|
725
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
726
|
+
<!-- agent-cli transcribe --help -->
|
|
727
|
+
<!-- echo '```' -->
|
|
728
|
+
<!-- CODE:END -->
|
|
729
|
+
<!-- OUTPUT:START -->
|
|
730
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
731
|
+
```yaml
|
|
732
|
+
|
|
733
|
+
Usage: agent-cli transcribe [OPTIONS]
|
|
734
|
+
|
|
735
|
+
Wyoming ASR Client for streaming microphone audio to a transcription server.
|
|
736
|
+
|
|
737
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
738
|
+
│ --help -h Show this message and exit. │
|
|
739
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
740
|
+
╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
|
|
741
|
+
│ --extra-instructions TEXT Additional instructions for the LLM to │
|
|
742
|
+
│ process the transcription. │
|
|
743
|
+
│ --llm --no-llm Use an LLM to process the transcript. │
|
|
744
|
+
│ [default: no-llm] │
|
|
745
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
746
|
+
╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
|
|
747
|
+
│ --from-file PATH Transcribe audio from a file │
|
|
748
|
+
│ (supports wav, mp3, m4a, ogg, │
|
|
749
|
+
│ flac, aac, webm). Requires ffmpeg │
|
|
750
|
+
│ for non-WAV formats with Wyoming │
|
|
751
|
+
│ provider. │
|
|
752
|
+
│ --last-recording INTEGER Transcribe a saved recording. Use │
|
|
753
|
+
│ 1 for most recent, 2 for │
|
|
754
|
+
│ second-to-last, etc. Use 0 to │
|
|
755
|
+
│ disable (default). │
|
|
756
|
+
│ [default: 0] │
|
|
757
|
+
│ --save-recording --no-save-recording Save the audio recording to disk │
|
|
758
|
+
│ for recovery. │
|
|
759
|
+
│ [default: save-recording] │
|
|
760
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
761
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
762
|
+
│ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
|
|
763
|
+
│ [env var: ASR_PROVIDER] │
|
|
764
|
+
│ [default: wyoming] │
|
|
765
|
+
│ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
|
|
766
|
+
│ [env var: LLM_PROVIDER] │
|
|
767
|
+
│ [default: ollama] │
|
|
768
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
769
|
+
╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
|
|
770
|
+
│ --input-device-index INTEGER Index of the audio input device to use. │
|
|
771
|
+
│ --input-device-name TEXT Device name keywords for partial matching. │
|
|
772
|
+
│ --list-devices List available audio input and output devices and │
|
|
773
|
+
│ exit. │
|
|
774
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
775
|
+
╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
|
|
776
|
+
│ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
|
|
777
|
+
│ [env var: ASR_WYOMING_IP] │
|
|
778
|
+
│ [default: localhost] │
|
|
779
|
+
│ --asr-wyoming-port INTEGER Wyoming ASR server port. │
|
|
780
|
+
│ [env var: ASR_WYOMING_PORT] │
|
|
781
|
+
│ [default: 10300] │
|
|
782
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
783
|
+
╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
|
|
784
|
+
│ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
|
|
785
|
+
│ [env var: ASR_OPENAI_MODEL] │
|
|
786
|
+
│ [default: whisper-1] │
|
|
787
|
+
│ --asr-openai-base-url TEXT Custom base URL for OpenAI-compatible ASR API │
|
|
788
|
+
│ (e.g., for custom Whisper server: │
|
|
789
|
+
│ http://localhost:9898). │
|
|
790
|
+
│ [env var: ASR_OPENAI_BASE_URL] │
|
|
791
|
+
│ --asr-openai-prompt TEXT Custom prompt to guide transcription (optional). │
|
|
792
|
+
│ [env var: ASR_OPENAI_PROMPT] │
|
|
793
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
794
|
+
╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
|
|
795
|
+
│ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
|
|
796
|
+
│ [env var: ASR_GEMINI_MODEL] │
|
|
797
|
+
│ [default: gemini-3-flash-preview] │
|
|
798
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
799
|
+
╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
|
|
800
|
+
│ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
|
|
801
|
+
│ [env var: LLM_OLLAMA_MODEL] │
|
|
802
|
+
│ [default: gemma3:4b] │
|
|
803
|
+
│ --llm-ollama-host TEXT The Ollama server host. Default is │
|
|
804
|
+
│ http://localhost:11434. │
|
|
805
|
+
│ [env var: LLM_OLLAMA_HOST] │
|
|
806
|
+
│ [default: http://localhost:11434] │
|
|
807
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
808
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
809
|
+
│ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
|
|
810
|
+
│ [env var: LLM_OPENAI_MODEL] │
|
|
811
|
+
│ [default: gpt-5-mini] │
|
|
812
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
813
|
+
│ OPENAI_API_KEY environment variable. │
|
|
814
|
+
│ [env var: OPENAI_API_KEY] │
|
|
815
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
816
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
817
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
818
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
819
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
820
|
+
│ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
|
|
821
|
+
│ [env var: LLM_GEMINI_MODEL] │
|
|
822
|
+
│ [default: gemini-3-flash-preview] │
|
|
823
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
824
|
+
│ GEMINI_API_KEY environment variable. │
|
|
825
|
+
│ [env var: GEMINI_API_KEY] │
|
|
826
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
827
|
+
╭─ Process Management ───────────────────────────────────────────────────────────────────╮
|
|
828
|
+
│ --stop Stop any running background process. │
|
|
829
|
+
│ --status Check if a background process is running. │
|
|
830
|
+
│ --toggle Toggle the background process on/off. If the process is running, it │
|
|
831
|
+
│ will be stopped. If the process is not running, it will be started. │
|
|
832
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
833
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
834
|
+
│ --clipboard --no-clipboard Copy result to │
|
|
835
|
+
│ clipboard. │
|
|
836
|
+
│ [default: clipboard] │
|
|
837
|
+
│ --log-level [debug|info|warning| Set logging level. │
|
|
838
|
+
│ error] [env var: LOG_LEVEL] │
|
|
839
|
+
│ [default: info] │
|
|
840
|
+
│ --log-file TEXT Path to a file to │
|
|
841
|
+
│ write logs to. │
|
|
842
|
+
│ --quiet -q Suppress console │
|
|
843
|
+
│ output from rich. │
|
|
844
|
+
│ --json Output result as JSON │
|
|
845
|
+
│ for automation. │
|
|
846
|
+
│ Implies --quiet and │
|
|
847
|
+
│ --no-clipboard. │
|
|
848
|
+
│ --config TEXT Path to a TOML │
|
|
849
|
+
│ configuration file. │
|
|
850
|
+
│ --print-args Print the command │
|
|
851
|
+
│ line arguments, │
|
|
852
|
+
│ including variables │
|
|
853
|
+
│ taken from the │
|
|
854
|
+
│ configuration file. │
|
|
855
|
+
│ --transcription-log PATH Path to log │
|
|
856
|
+
│ transcription results │
|
|
857
|
+
│ with timestamps, │
|
|
858
|
+
│ hostname, model, and │
|
|
859
|
+
│ raw output. │
|
|
860
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
861
|
+
|
|
862
|
+
```
|
|
863
|
+
|
|
864
|
+
<!-- OUTPUT:END -->
|
|
865
|
+
|
|
866
|
+
</details>
|
|
867
|
+
|
|
868
|
+
### `transcribe-daemon`
|
|
869
|
+
|
|
870
|
+
**Purpose:** A continuous background transcription service that automatically detects and transcribes speech.
|
|
871
|
+
|
|
872
|
+
**Workflow:** Runs as a daemon, listening to your microphone and automatically segmenting speech using voice activity detection (VAD).
|
|
873
|
+
|
|
874
|
+
1. Run the command. It starts listening immediately.
|
|
875
|
+
2. Speak naturally - the daemon detects when you start and stop speaking.
|
|
876
|
+
3. Each speech segment is automatically transcribed and logged.
|
|
877
|
+
4. Optionally, audio is saved as MP3 files for later reference.
|
|
878
|
+
5. Press `Ctrl+C` to stop the daemon.
|
|
879
|
+
|
|
880
|
+
**Installation:** Requires the `vad` extra:
|
|
881
|
+
```bash
|
|
882
|
+
uv tool install "agent-cli[vad]" -p 3.13
|
|
883
|
+
```
|
|
884
|
+
|
|
885
|
+
**How to Use It:**
|
|
886
|
+
|
|
887
|
+
- **Basic Daemon**: `agent-cli transcribe-daemon`
|
|
888
|
+
- **With Custom Role**: `agent-cli transcribe-daemon --role meeting`
|
|
889
|
+
- **With LLM Cleanup**: `agent-cli transcribe-daemon --llm`
|
|
890
|
+
- **Custom Silence Threshold**: `agent-cli transcribe-daemon --silence-threshold 1.5`
|
|
891
|
+
|
|
892
|
+
**Output Files:**
|
|
893
|
+
|
|
894
|
+
- **Transcription Log**: `~/.config/agent-cli/transcriptions.jsonl` (JSON Lines format)
|
|
895
|
+
- **Audio Files**: `~/.config/agent-cli/audio/YYYY/MM/DD/*.mp3`
|
|
896
|
+
|
|
897
|
+
<details>
|
|
898
|
+
<summary>See the output of <code>agent-cli transcribe-daemon --help</code></summary>
|
|
899
|
+
|
|
900
|
+
<!-- CODE:BASH:START -->
|
|
901
|
+
<!-- echo '```yaml' -->
|
|
902
|
+
<!-- export NO_COLOR=1 -->
|
|
903
|
+
<!-- export TERM=dumb -->
|
|
904
|
+
<!-- export COLUMNS=90 -->
|
|
905
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
906
|
+
<!-- agent-cli transcribe-daemon --help -->
|
|
907
|
+
<!-- echo '```' -->
|
|
908
|
+
<!-- CODE:END -->
|
|
909
|
+
<!-- OUTPUT:START -->
|
|
910
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
911
|
+
```yaml
|
|
912
|
+
|
|
913
|
+
Usage: agent-cli transcribe-daemon [OPTIONS]
|
|
914
|
+
|
|
915
|
+
Run a continuous transcription daemon with voice activity detection.
|
|
916
|
+
|
|
917
|
+
This command runs indefinitely, capturing audio from your microphone, detecting speech
|
|
918
|
+
segments using Silero VAD, transcribing them, and logging results with timestamps.
|
|
919
|
+
|
|
920
|
+
Examples: # Basic daemon agent-cli transcribe-daemon
|
|
921
|
+
|
|
922
|
+
|
|
923
|
+
# With role and custom silence threshold
|
|
924
|
+
agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
|
|
925
|
+
|
|
926
|
+
# With LLM cleanup
|
|
927
|
+
agent-cli transcribe-daemon --llm --role notes
|
|
928
|
+
|
|
929
|
+
# Custom log file and audio directory
|
|
930
|
+
agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
|
|
931
|
+
|
|
932
|
+
|
|
933
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
934
|
+
│ --role -r TEXT Role name for logging (e.g., │
|
|
935
|
+
│ 'meeting', 'notes', 'user'). │
|
|
936
|
+
│ [default: user] │
|
|
937
|
+
│ --silence-threshold -s FLOAT Seconds of silence to end a speech │
|
|
938
|
+
│ segment. │
|
|
939
|
+
│ [default: 1.0] │
|
|
940
|
+
│ --min-segment -m FLOAT Minimum speech duration in seconds │
|
|
941
|
+
│ to trigger a segment. │
|
|
942
|
+
│ [default: 0.25] │
|
|
943
|
+
│ --vad-threshold FLOAT VAD speech detection threshold │
|
|
944
|
+
│ (0.0-1.0). Higher = more aggressive │
|
|
945
|
+
│ filtering. │
|
|
946
|
+
│ [default: 0.3] │
|
|
947
|
+
│ --save-audio --no-save-audio Save audio segments as MP3 files. │
|
|
948
|
+
│ [default: save-audio] │
|
|
949
|
+
│ --audio-dir PATH Directory for MP3 files. Default: │
|
|
950
|
+
│ ~/.config/agent-cli/audio │
|
|
951
|
+
│ --transcription-log -t PATH JSON Lines log file path. Default: │
|
|
952
|
+
│ ~/.config/agent-cli/transcriptions… │
|
|
953
|
+
│ --clipboard --no-clipboard Copy each transcription to │
|
|
954
|
+
│ clipboard. │
|
|
955
|
+
│ [default: no-clipboard] │
|
|
956
|
+
│ --help -h Show this message and exit. │
|
|
957
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
958
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
959
|
+
│ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
|
|
960
|
+
│ [env var: ASR_PROVIDER] │
|
|
961
|
+
│ [default: wyoming] │
|
|
962
|
+
│ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
|
|
963
|
+
│ [env var: LLM_PROVIDER] │
|
|
964
|
+
│ [default: ollama] │
|
|
965
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
966
|
+
╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
|
|
967
|
+
│ --input-device-index INTEGER Index of the audio input device to use. │
|
|
968
|
+
│ --input-device-name TEXT Device name keywords for partial matching. │
|
|
969
|
+
│ --list-devices List available audio input and output devices and │
|
|
970
|
+
│ exit. │
|
|
971
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
972
|
+
╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
|
|
973
|
+
│ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
|
|
974
|
+
│ [env var: ASR_WYOMING_IP] │
|
|
975
|
+
│ [default: localhost] │
|
|
976
|
+
│ --asr-wyoming-port INTEGER Wyoming ASR server port. │
|
|
977
|
+
│ [env var: ASR_WYOMING_PORT] │
|
|
978
|
+
│ [default: 10300] │
|
|
979
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
980
|
+
╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
|
|
981
|
+
│ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
|
|
982
|
+
│ [env var: ASR_OPENAI_MODEL] │
|
|
983
|
+
│ [default: whisper-1] │
|
|
984
|
+
│ --asr-openai-base-url TEXT Custom base URL for OpenAI-compatible ASR API │
|
|
985
|
+
│ (e.g., for custom Whisper server: │
|
|
986
|
+
│ http://localhost:9898). │
|
|
987
|
+
│ [env var: ASR_OPENAI_BASE_URL] │
|
|
988
|
+
│ --asr-openai-prompt TEXT Custom prompt to guide transcription (optional). │
|
|
989
|
+
│ [env var: ASR_OPENAI_PROMPT] │
|
|
990
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
991
|
+
╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
|
|
992
|
+
│ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
|
|
993
|
+
│ [env var: ASR_GEMINI_MODEL] │
|
|
994
|
+
│ [default: gemini-3-flash-preview] │
|
|
995
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
996
|
+
╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
|
|
997
|
+
│ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
|
|
998
|
+
│ [env var: LLM_OLLAMA_MODEL] │
|
|
999
|
+
│ [default: gemma3:4b] │
|
|
1000
|
+
│ --llm-ollama-host TEXT The Ollama server host. Default is │
|
|
1001
|
+
│ http://localhost:11434. │
|
|
1002
|
+
│ [env var: LLM_OLLAMA_HOST] │
|
|
1003
|
+
│ [default: http://localhost:11434] │
|
|
1004
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1005
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
1006
|
+
│ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
|
|
1007
|
+
│ [env var: LLM_OPENAI_MODEL] │
|
|
1008
|
+
│ [default: gpt-5-mini] │
|
|
1009
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
1010
|
+
│ OPENAI_API_KEY environment variable. │
|
|
1011
|
+
│ [env var: OPENAI_API_KEY] │
|
|
1012
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
1013
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
1014
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
1015
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1016
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
1017
|
+
│ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
|
|
1018
|
+
│ [env var: LLM_GEMINI_MODEL] │
|
|
1019
|
+
│ [default: gemini-3-flash-preview] │
|
|
1020
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
1021
|
+
│ GEMINI_API_KEY environment variable. │
|
|
1022
|
+
│ [env var: GEMINI_API_KEY] │
|
|
1023
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1024
|
+
╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
|
|
1025
|
+
│ --llm --no-llm Use an LLM to process the transcript. │
|
|
1026
|
+
│ [default: no-llm] │
|
|
1027
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1028
|
+
╭─ Process Management ───────────────────────────────────────────────────────────────────╮
|
|
1029
|
+
│ --stop Stop any running background process. │
|
|
1030
|
+
│ --status Check if a background process is running. │
|
|
1031
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1032
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1033
|
+
│ --log-level [debug|info|warning|error] Set logging level. │
|
|
1034
|
+
│ [env var: LOG_LEVEL] │
|
|
1035
|
+
│ [default: info] │
|
|
1036
|
+
│ --log-file TEXT Path to a file to write logs to. │
|
|
1037
|
+
│ --quiet -q Suppress console output from rich. │
|
|
1038
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
1039
|
+
│ --print-args Print the command line arguments, │
|
|
1040
|
+
│ including variables taken from the │
|
|
1041
|
+
│ configuration file. │
|
|
1042
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1043
|
+
|
|
1044
|
+
```
|
|
1045
|
+
|
|
1046
|
+
<!-- OUTPUT:END -->
|
|
1047
|
+
|
|
1048
|
+
</details>
|
|
1049
|
+
|
|
1050
|
+
### `speak`
|
|
1051
|
+
|
|
1052
|
+
**Purpose:** Reads any text out loud.
|
|
1053
|
+
|
|
1054
|
+
**Workflow:** A straightforward text-to-speech utility.
|
|
1055
|
+
|
|
1056
|
+
1. It takes text from a command-line argument or your clipboard.
|
|
1057
|
+
2. It sends the text to a Wyoming TTS server (like Piper).
|
|
1058
|
+
3. The generated audio is played through your default speakers.
|
|
1059
|
+
|
|
1060
|
+
**How to Use It:**
|
|
1061
|
+
|
|
1062
|
+
- **Speak from Argument**: `agent-cli speak "Hello, world!"`
|
|
1063
|
+
- **Speak from Clipboard**: `agent-cli speak`
|
|
1064
|
+
- **Save to File**: `agent-cli speak "Hello" --save-file hello.wav`
|
|
1065
|
+
|
|
1066
|
+
<details>
|
|
1067
|
+
<summary>See the output of <code>agent-cli speak --help</code></summary>
|
|
1068
|
+
|
|
1069
|
+
<!-- CODE:BASH:START -->
|
|
1070
|
+
<!-- echo '```yaml' -->
|
|
1071
|
+
<!-- export NO_COLOR=1 -->
|
|
1072
|
+
<!-- export TERM=dumb -->
|
|
1073
|
+
<!-- export COLUMNS=90 -->
|
|
1074
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
1075
|
+
<!-- agent-cli speak --help -->
|
|
1076
|
+
<!-- echo '```' -->
|
|
1077
|
+
<!-- CODE:END -->
|
|
1078
|
+
<!-- OUTPUT:START -->
|
|
1079
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
1080
|
+
```yaml
|
|
1081
|
+
|
|
1082
|
+
Usage: agent-cli speak [OPTIONS] [TEXT]
|
|
1083
|
+
|
|
1084
|
+
Convert text to speech using Wyoming or OpenAI-compatible TTS server.
|
|
1085
|
+
|
|
1086
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1087
|
+
│ text [TEXT] Text to speak. Reads from clipboard if not provided. │
|
|
1088
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1089
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
1090
|
+
│ --help -h Show this message and exit. │
|
|
1091
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1092
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
1093
|
+
│ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
|
|
1094
|
+
│ 'gemini'). │
|
|
1095
|
+
│ [env var: TTS_PROVIDER] │
|
|
1096
|
+
│ [default: wyoming] │
|
|
1097
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1098
|
+
╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
|
|
1099
|
+
│ --output-device-index INTEGER Index of the audio output device to use for TTS. │
|
|
1100
|
+
│ --output-device-name TEXT Output device name keywords for partial │
|
|
1101
|
+
│ matching. │
|
|
1102
|
+
│ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, 2.0 = │
|
|
1103
|
+
│ twice as fast, 0.5 = half speed). │
|
|
1104
|
+
│ [default: 1.0] │
|
|
1105
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1106
|
+
╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
|
|
1107
|
+
│ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
|
|
1108
|
+
│ [default: localhost] │
|
|
1109
|
+
│ --tts-wyoming-port INTEGER Wyoming TTS server port. │
|
|
1110
|
+
│ [default: 10200] │
|
|
1111
|
+
│ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
|
|
1112
|
+
│ 'en_US-lessac-medium'). │
|
|
1113
|
+
│ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
|
|
1114
|
+
│ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
|
|
1115
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1116
|
+
╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
|
|
1117
|
+
│ --tts-openai-model TEXT The OpenAI model to use for TTS. │
|
|
1118
|
+
│ [default: tts-1] │
|
|
1119
|
+
│ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
|
|
1120
|
+
│ [default: alloy] │
|
|
1121
|
+
│ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
|
|
1122
|
+
│ (e.g., http://localhost:8000/v1 for a proxy). │
|
|
1123
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1124
|
+
╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
|
|
1125
|
+
│ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
|
|
1126
|
+
│ [default: kokoro] │
|
|
1127
|
+
│ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
|
|
1128
|
+
│ [default: af_sky] │
|
|
1129
|
+
│ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
|
|
1130
|
+
│ [default: http://localhost:8880/v1] │
|
|
1131
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1132
|
+
╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
|
|
1133
|
+
│ --tts-gemini-model TEXT The Gemini model to use for TTS. │
|
|
1134
|
+
│ [default: gemini-2.5-flash-preview-tts] │
|
|
1135
|
+
│ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
|
|
1136
|
+
│ 'Charon', 'Fenrir'). │
|
|
1137
|
+
│ [default: Kore] │
|
|
1138
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1139
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
1140
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
1141
|
+
│ GEMINI_API_KEY environment variable. │
|
|
1142
|
+
│ [env var: GEMINI_API_KEY] │
|
|
1143
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1144
|
+
╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
|
|
1145
|
+
│ --list-devices List available audio input and output devices and exit. │
|
|
1146
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1147
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1148
|
+
│ --save-file PATH Save TTS response audio to WAV file. │
|
|
1149
|
+
│ --log-level [debug|info|warning|error] Set logging level. │
|
|
1150
|
+
│ [env var: LOG_LEVEL] │
|
|
1151
|
+
│ [default: info] │
|
|
1152
|
+
│ --log-file TEXT Path to a file to write logs to. │
|
|
1153
|
+
│ --quiet -q Suppress console output from rich. │
|
|
1154
|
+
│ --json Output result as JSON for │
|
|
1155
|
+
│ automation. Implies --quiet and │
|
|
1156
|
+
│ --no-clipboard. │
|
|
1157
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
1158
|
+
│ --print-args Print the command line arguments, │
|
|
1159
|
+
│ including variables taken from the │
|
|
1160
|
+
│ configuration file. │
|
|
1161
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1162
|
+
╭─ Process Management ───────────────────────────────────────────────────────────────────╮
|
|
1163
|
+
│ --stop Stop any running background process. │
|
|
1164
|
+
│ --status Check if a background process is running. │
|
|
1165
|
+
│ --toggle Toggle the background process on/off. If the process is running, it │
|
|
1166
|
+
│ will be stopped. If the process is not running, it will be started. │
|
|
1167
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1168
|
+
|
|
1169
|
+
```
|
|
1170
|
+
|
|
1171
|
+
<!-- OUTPUT:END -->
|
|
1172
|
+
|
|
1173
|
+
</details>
|
|
1174
|
+
|
|
1175
|
+
### `voice-edit`
|
|
1176
|
+
|
|
1177
|
+
**Purpose:** A powerful clipboard assistant that you command with your voice.
|
|
1178
|
+
|
|
1179
|
+
**Workflow:** This agent is designed for a hotkey-driven workflow to act on text you've already copied.
|
|
1180
|
+
|
|
1181
|
+
1. Copy a block of text to your clipboard (e.g., an email draft).
|
|
1182
|
+
2. Press a hotkey to run `agent-cli voice-edit &` in the background. The agent is now listening.
|
|
1183
|
+
3. Speak a command, such as "Make this more formal" or "Summarize the key points."
|
|
1184
|
+
4. Press the same hotkey again, which should trigger `agent-cli voice-edit --stop`.
|
|
1185
|
+
5. The agent transcribes your command, sends it along with the original clipboard text to the LLM, and the LLM performs the action.
|
|
1186
|
+
6. The result is copied back to your clipboard. If `--tts` is enabled, it will also speak the result.
|
|
1187
|
+
|
|
1188
|
+
**How to Use It:** The power of this tool is unlocked with a hotkey manager like Keyboard Maestro (macOS) or AutoHotkey (Windows). See the docstring in `agent_cli/agents/voice_edit.py` for a detailed Keyboard Maestro setup guide.
|
|
1189
|
+
|
|
1190
|
+
<details>
|
|
1191
|
+
<summary>See the output of <code>agent-cli voice-edit --help</code></summary>
|
|
1192
|
+
|
|
1193
|
+
<!-- CODE:BASH:START -->
|
|
1194
|
+
<!-- echo '```yaml' -->
|
|
1195
|
+
<!-- export NO_COLOR=1 -->
|
|
1196
|
+
<!-- export TERM=dumb -->
|
|
1197
|
+
<!-- export COLUMNS=90 -->
|
|
1198
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
1199
|
+
<!-- agent-cli voice-edit --help -->
|
|
1200
|
+
<!-- echo '```' -->
|
|
1201
|
+
<!-- CODE:END -->
|
|
1202
|
+
<!-- OUTPUT:START -->
|
|
1203
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
1204
|
+
```yaml
|
|
1205
|
+
|
|
1206
|
+
Usage: agent-cli voice-edit [OPTIONS]
|
|
1207
|
+
|
|
1208
|
+
Interact with clipboard text via a voice command using local or remote services.
|
|
1209
|
+
|
|
1210
|
+
Usage:
|
|
1211
|
+
|
|
1212
|
+
• Run in foreground: agent-cli voice-edit --input-device-index 1
|
|
1213
|
+
• Run in background: agent-cli voice-edit --input-device-index 1 &
|
|
1214
|
+
• Check status: agent-cli voice-edit --status
|
|
1215
|
+
• Stop background process: agent-cli voice-edit --stop
|
|
1216
|
+
• List output devices: agent-cli voice-edit --list-output-devices
|
|
1217
|
+
• Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
|
|
1218
|
+
|
|
1219
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
1220
|
+
│ --help -h Show this message and exit. │
|
|
1221
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1222
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
1223
|
+
│ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
|
|
1224
|
+
│ [env var: ASR_PROVIDER] │
|
|
1225
|
+
│ [default: wyoming] │
|
|
1226
|
+
│ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
|
|
1227
|
+
│ [env var: LLM_PROVIDER] │
|
|
1228
|
+
│ [default: ollama] │
|
|
1229
|
+
│ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
|
|
1230
|
+
│ 'gemini'). │
|
|
1231
|
+
│ [env var: TTS_PROVIDER] │
|
|
1232
|
+
│ [default: wyoming] │
|
|
1233
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1234
|
+
╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
|
|
1235
|
+
│ --input-device-index INTEGER Index of the audio input device to use. │
|
|
1236
|
+
│ --input-device-name TEXT Device name keywords for partial matching. │
|
|
1237
|
+
│ --list-devices List available audio input and output devices and │
|
|
1238
|
+
│ exit. │
|
|
1239
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1240
|
+
╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
|
|
1241
|
+
│ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
|
|
1242
|
+
│ [env var: ASR_WYOMING_IP] │
|
|
1243
|
+
│ [default: localhost] │
|
|
1244
|
+
│ --asr-wyoming-port INTEGER Wyoming ASR server port. │
|
|
1245
|
+
│ [env var: ASR_WYOMING_PORT] │
|
|
1246
|
+
│ [default: 10300] │
|
|
1247
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1248
|
+
╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
|
|
1249
|
+
│ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
|
|
1250
|
+
│ [env var: ASR_OPENAI_MODEL] │
|
|
1251
|
+
│ [default: whisper-1] │
|
|
1252
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1253
|
+
╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
|
|
1254
|
+
│ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
|
|
1255
|
+
│ [env var: ASR_GEMINI_MODEL] │
|
|
1256
|
+
│ [default: gemini-3-flash-preview] │
|
|
1257
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1258
|
+
╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
|
|
1259
|
+
│ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
|
|
1260
|
+
│ [env var: LLM_OLLAMA_MODEL] │
|
|
1261
|
+
│ [default: gemma3:4b] │
|
|
1262
|
+
│ --llm-ollama-host TEXT The Ollama server host. Default is │
|
|
1263
|
+
│ http://localhost:11434. │
|
|
1264
|
+
│ [env var: LLM_OLLAMA_HOST] │
|
|
1265
|
+
│ [default: http://localhost:11434] │
|
|
1266
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1267
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
1268
|
+
│ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
|
|
1269
|
+
│ [env var: LLM_OPENAI_MODEL] │
|
|
1270
|
+
│ [default: gpt-5-mini] │
|
|
1271
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
1272
|
+
│ OPENAI_API_KEY environment variable. │
|
|
1273
|
+
│ [env var: OPENAI_API_KEY] │
|
|
1274
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
1275
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
1276
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
1277
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1278
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
1279
|
+
│ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
|
|
1280
|
+
│ [env var: LLM_GEMINI_MODEL] │
|
|
1281
|
+
│ [default: gemini-3-flash-preview] │
|
|
1282
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
1283
|
+
│ GEMINI_API_KEY environment variable. │
|
|
1284
|
+
│ [env var: GEMINI_API_KEY] │
|
|
1285
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1286
|
+
╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
|
|
1287
|
+
│ --tts --no-tts Enable text-to-speech for responses. │
|
|
1288
|
+
│ [default: no-tts] │
|
|
1289
|
+
│ --output-device-index INTEGER Index of the audio output device to use │
|
|
1290
|
+
│ for TTS. │
|
|
1291
|
+
│ --output-device-name TEXT Output device name keywords for partial │
|
|
1292
|
+
│ matching. │
|
|
1293
|
+
│ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, │
|
|
1294
|
+
│ 2.0 = twice as fast, 0.5 = half speed). │
|
|
1295
|
+
│ [default: 1.0] │
|
|
1296
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1297
|
+
╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
|
|
1298
|
+
│ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
|
|
1299
|
+
│ [default: localhost] │
|
|
1300
|
+
│ --tts-wyoming-port INTEGER Wyoming TTS server port. │
|
|
1301
|
+
│ [default: 10200] │
|
|
1302
|
+
│ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
|
|
1303
|
+
│ 'en_US-lessac-medium'). │
|
|
1304
|
+
│ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
|
|
1305
|
+
│ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
|
|
1306
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1307
|
+
╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
|
|
1308
|
+
│ --tts-openai-model TEXT The OpenAI model to use for TTS. │
|
|
1309
|
+
│ [default: tts-1] │
|
|
1310
|
+
│ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
|
|
1311
|
+
│ [default: alloy] │
|
|
1312
|
+
│ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
|
|
1313
|
+
│ (e.g., http://localhost:8000/v1 for a proxy). │
|
|
1314
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1315
|
+
╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
|
|
1316
|
+
│ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
|
|
1317
|
+
│ [default: kokoro] │
|
|
1318
|
+
│ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
|
|
1319
|
+
│ [default: af_sky] │
|
|
1320
|
+
│ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
|
|
1321
|
+
│ [default: http://localhost:8880/v1] │
|
|
1322
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1323
|
+
╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
|
|
1324
|
+
│ --tts-gemini-model TEXT The Gemini model to use for TTS. │
|
|
1325
|
+
│ [default: gemini-2.5-flash-preview-tts] │
|
|
1326
|
+
│ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
|
|
1327
|
+
│ 'Charon', 'Fenrir'). │
|
|
1328
|
+
│ [default: Kore] │
|
|
1329
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1330
|
+
╭─ Process Management ───────────────────────────────────────────────────────────────────╮
|
|
1331
|
+
│ --stop Stop any running background process. │
|
|
1332
|
+
│ --status Check if a background process is running. │
|
|
1333
|
+
│ --toggle Toggle the background process on/off. If the process is running, it │
|
|
1334
|
+
│ will be stopped. If the process is not running, it will be started. │
|
|
1335
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1336
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1337
|
+
│ --save-file PATH Save TTS response audio │
|
|
1338
|
+
│ to WAV file. │
|
|
1339
|
+
│ --clipboard --no-clipboard Copy result to │
|
|
1340
|
+
│ clipboard. │
|
|
1341
|
+
│ [default: clipboard] │
|
|
1342
|
+
│ --log-level [debug|info|warning|erro Set logging level. │
|
|
1343
|
+
│ r] [env var: LOG_LEVEL] │
|
|
1344
|
+
│ [default: info] │
|
|
1345
|
+
│ --log-file TEXT Path to a file to write │
|
|
1346
|
+
│ logs to. │
|
|
1347
|
+
│ --quiet -q Suppress console output │
|
|
1348
|
+
│ from rich. │
|
|
1349
|
+
│ --json Output result as JSON │
|
|
1350
|
+
│ for automation. Implies │
|
|
1351
|
+
│ --quiet and │
|
|
1352
|
+
│ --no-clipboard. │
|
|
1353
|
+
│ --config TEXT Path to a TOML │
|
|
1354
|
+
│ configuration file. │
|
|
1355
|
+
│ --print-args Print the command line │
|
|
1356
|
+
│ arguments, including │
|
|
1357
|
+
│ variables taken from the │
|
|
1358
|
+
│ configuration file. │
|
|
1359
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1360
|
+
|
|
1361
|
+
```
|
|
1362
|
+
|
|
1363
|
+
<!-- OUTPUT:END -->
|
|
1364
|
+
|
|
1365
|
+
</details>
|
|
1366
|
+
|
|
1367
|
+
### `assistant`
|
|
1368
|
+
|
|
1369
|
+
**Purpose:** A hands-free voice assistant that starts and stops recording based on a wake word.
|
|
1370
|
+
|
|
1371
|
+
**Workflow:** This agent continuously listens for a wake word (e.g., "Hey Nabu").
|
|
1372
|
+
|
|
1373
|
+
1. Run the `assistant` command. It will start listening for the wake word.
|
|
1374
|
+
2. Say the wake word to start recording.
|
|
1375
|
+
3. Speak your command or question.
|
|
1376
|
+
4. Say the wake word again to stop recording.
|
|
1377
|
+
5. The agent transcribes your speech, sends it to the LLM, and gets a response.
|
|
1378
|
+
6. The agent speaks the response back to you and then immediately starts listening for the wake word again.
|
|
1379
|
+
|
|
1380
|
+
**How to Use It:**
|
|
1381
|
+
|
|
1382
|
+
- **Start the agent**: `agent-cli assistant --wake-word "ok_nabu" --input-device-index 1`
|
|
1383
|
+
- **With TTS**: `agent-cli assistant --wake-word "ok_nabu" --tts --tts-wyoming-voice "en_US-lessac-medium"`
|
|
1384
|
+
|
|
1385
|
+
<details>
|
|
1386
|
+
<summary>See the output of <code>agent-cli assistant --help</code></summary>
|
|
1387
|
+
|
|
1388
|
+
<!-- CODE:BASH:START -->
|
|
1389
|
+
<!-- echo '```yaml' -->
|
|
1390
|
+
<!-- export NO_COLOR=1 -->
|
|
1391
|
+
<!-- export TERM=dumb -->
|
|
1392
|
+
<!-- export COLUMNS=90 -->
|
|
1393
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
1394
|
+
<!-- agent-cli assistant --help -->
|
|
1395
|
+
<!-- echo '```' -->
|
|
1396
|
+
<!-- CODE:END -->
|
|
1397
|
+
<!-- OUTPUT:START -->
|
|
1398
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
1399
|
+
```yaml
|
|
1400
|
+
|
|
1401
|
+
Usage: agent-cli assistant [OPTIONS]
|
|
1402
|
+
|
|
1403
|
+
Wake word-based voice assistant using local or remote services.
|
|
1404
|
+
|
|
1405
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
1406
|
+
│ --help -h Show this message and exit. │
|
|
1407
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1408
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
1409
|
+
│ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
|
|
1410
|
+
│ [env var: ASR_PROVIDER] │
|
|
1411
|
+
│ [default: wyoming] │
|
|
1412
|
+
│ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
|
|
1413
|
+
│ [env var: LLM_PROVIDER] │
|
|
1414
|
+
│ [default: ollama] │
|
|
1415
|
+
│ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
|
|
1416
|
+
│ 'gemini'). │
|
|
1417
|
+
│ [env var: TTS_PROVIDER] │
|
|
1418
|
+
│ [default: wyoming] │
|
|
1419
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1420
|
+
╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
|
|
1421
|
+
│ --wake-server-ip TEXT Wyoming wake word server IP address. │
|
|
1422
|
+
│ [default: localhost] │
|
|
1423
|
+
│ --wake-server-port INTEGER Wyoming wake word server port. │
|
|
1424
|
+
│ [default: 10400] │
|
|
1425
|
+
│ --wake-word TEXT Name of wake word to detect (e.g., 'ok_nabu', │
|
|
1426
|
+
│ 'hey_jarvis'). │
|
|
1427
|
+
│ [default: ok_nabu] │
|
|
1428
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1429
|
+
╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
|
|
1430
|
+
│ --input-device-index INTEGER Index of the audio input device to use. │
|
|
1431
|
+
│ --input-device-name TEXT Device name keywords for partial matching. │
|
|
1432
|
+
│ --list-devices List available audio input and output devices and │
|
|
1433
|
+
│ exit. │
|
|
1434
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1435
|
+
╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
|
|
1436
|
+
│ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
|
|
1437
|
+
│ [env var: ASR_WYOMING_IP] │
|
|
1438
|
+
│ [default: localhost] │
|
|
1439
|
+
│ --asr-wyoming-port INTEGER Wyoming ASR server port. │
|
|
1440
|
+
│ [env var: ASR_WYOMING_PORT] │
|
|
1441
|
+
│ [default: 10300] │
|
|
1442
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1443
|
+
╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
|
|
1444
|
+
│ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
|
|
1445
|
+
│ [env var: ASR_OPENAI_MODEL] │
|
|
1446
|
+
│ [default: whisper-1] │
|
|
1447
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1448
|
+
╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
|
|
1449
|
+
│ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
|
|
1450
|
+
│ [env var: ASR_GEMINI_MODEL] │
|
|
1451
|
+
│ [default: gemini-3-flash-preview] │
|
|
1452
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1453
|
+
╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
|
|
1454
|
+
│ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
|
|
1455
|
+
│ [env var: LLM_OLLAMA_MODEL] │
|
|
1456
|
+
│ [default: gemma3:4b] │
|
|
1457
|
+
│ --llm-ollama-host TEXT The Ollama server host. Default is │
|
|
1458
|
+
│ http://localhost:11434. │
|
|
1459
|
+
│ [env var: LLM_OLLAMA_HOST] │
|
|
1460
|
+
│ [default: http://localhost:11434] │
|
|
1461
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1462
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
1463
|
+
│ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
|
|
1464
|
+
│ [env var: LLM_OPENAI_MODEL] │
|
|
1465
|
+
│ [default: gpt-5-mini] │
|
|
1466
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
1467
|
+
│ OPENAI_API_KEY environment variable. │
|
|
1468
|
+
│ [env var: OPENAI_API_KEY] │
|
|
1469
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
1470
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
1471
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
1472
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1473
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
1474
|
+
│ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
|
|
1475
|
+
│ [env var: LLM_GEMINI_MODEL] │
|
|
1476
|
+
│ [default: gemini-3-flash-preview] │
|
|
1477
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
1478
|
+
│ GEMINI_API_KEY environment variable. │
|
|
1479
|
+
│ [env var: GEMINI_API_KEY] │
|
|
1480
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1481
|
+
╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
|
|
1482
|
+
│ --tts --no-tts Enable text-to-speech for responses. │
|
|
1483
|
+
│ [default: no-tts] │
|
|
1484
|
+
│ --output-device-index INTEGER Index of the audio output device to use │
|
|
1485
|
+
│ for TTS. │
|
|
1486
|
+
│ --output-device-name TEXT Output device name keywords for partial │
|
|
1487
|
+
│ matching. │
|
|
1488
|
+
│ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, │
|
|
1489
|
+
│ 2.0 = twice as fast, 0.5 = half speed). │
|
|
1490
|
+
│ [default: 1.0] │
|
|
1491
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1492
|
+
╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
|
|
1493
|
+
│ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
|
|
1494
|
+
│ [default: localhost] │
|
|
1495
|
+
│ --tts-wyoming-port INTEGER Wyoming TTS server port. │
|
|
1496
|
+
│ [default: 10200] │
|
|
1497
|
+
│ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
|
|
1498
|
+
│ 'en_US-lessac-medium'). │
|
|
1499
|
+
│ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
|
|
1500
|
+
│ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
|
|
1501
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1502
|
+
╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
|
|
1503
|
+
│ --tts-openai-model TEXT The OpenAI model to use for TTS. │
|
|
1504
|
+
│ [default: tts-1] │
|
|
1505
|
+
│ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
|
|
1506
|
+
│ [default: alloy] │
|
|
1507
|
+
│ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
|
|
1508
|
+
│ (e.g., http://localhost:8000/v1 for a proxy). │
|
|
1509
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1510
|
+
╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
|
|
1511
|
+
│ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
|
|
1512
|
+
│ [default: kokoro] │
|
|
1513
|
+
│ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
|
|
1514
|
+
│ [default: af_sky] │
|
|
1515
|
+
│ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
|
|
1516
|
+
│ [default: http://localhost:8880/v1] │
|
|
1517
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1518
|
+
╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
|
|
1519
|
+
│ --tts-gemini-model TEXT The Gemini model to use for TTS. │
|
|
1520
|
+
│ [default: gemini-2.5-flash-preview-tts] │
|
|
1521
|
+
│ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
|
|
1522
|
+
│ 'Charon', 'Fenrir'). │
|
|
1523
|
+
│ [default: Kore] │
|
|
1524
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1525
|
+
╭─ Process Management ───────────────────────────────────────────────────────────────────╮
|
|
1526
|
+
│ --stop Stop any running background process. │
|
|
1527
|
+
│ --status Check if a background process is running. │
|
|
1528
|
+
│ --toggle Toggle the background process on/off. If the process is running, it │
|
|
1529
|
+
│ will be stopped. If the process is not running, it will be started. │
|
|
1530
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1531
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1532
|
+
│ --save-file PATH Save TTS response audio │
|
|
1533
|
+
│ to WAV file. │
|
|
1534
|
+
│ --clipboard --no-clipboard Copy result to │
|
|
1535
|
+
│ clipboard. │
|
|
1536
|
+
│ [default: clipboard] │
|
|
1537
|
+
│ --log-level [debug|info|warning|erro Set logging level. │
|
|
1538
|
+
│ r] [env var: LOG_LEVEL] │
|
|
1539
|
+
│ [default: info] │
|
|
1540
|
+
│ --log-file TEXT Path to a file to write │
|
|
1541
|
+
│ logs to. │
|
|
1542
|
+
│ --quiet -q Suppress console output │
|
|
1543
|
+
│ from rich. │
|
|
1544
|
+
│ --config TEXT Path to a TOML │
|
|
1545
|
+
│ configuration file. │
|
|
1546
|
+
│ --print-args Print the command line │
|
|
1547
|
+
│ arguments, including │
|
|
1548
|
+
│ variables taken from the │
|
|
1549
|
+
│ configuration file. │
|
|
1550
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1551
|
+
|
|
1552
|
+
```
|
|
1553
|
+
|
|
1554
|
+
<!-- OUTPUT:END -->
|
|
1555
|
+
|
|
1556
|
+
</details>
|
|
1557
|
+
|
|
1558
|
+
### `chat`
|
|
1559
|
+
|
|
1560
|
+
**Purpose:** A full-featured, conversational AI assistant that can interact with your system.
|
|
1561
|
+
|
|
1562
|
+
**Workflow:** This is a persistent, conversational agent that you can have a conversation with.
|
|
1563
|
+
|
|
1564
|
+
1. Run the `chat` command. It will start listening for your voice.
|
|
1565
|
+
2. Speak your command or question (e.g., "What's in my current directory?").
|
|
1566
|
+
3. The agent transcribes your speech, sends it to the LLM, and gets a response. The LLM can use tools like `read_file` or `execute_code` to answer your question.
|
|
1567
|
+
4. The agent speaks the response back to you and then immediately starts listening for your next command.
|
|
1568
|
+
5. The conversation continues in this loop. Conversation history is saved between sessions.
|
|
1569
|
+
|
|
1570
|
+
**Interaction Model:**
|
|
1571
|
+
|
|
1572
|
+
- **To Interrupt**: Press `Ctrl+C` **once** to stop the agent from either listening or speaking, and it will immediately return to a listening state for a new command. This is useful if it misunderstands you or you want to speak again quickly.
|
|
1573
|
+
- **To Exit**: Press `Ctrl+C` **twice in a row** to terminate the application.
|
|
1574
|
+
|
|
1575
|
+
**How to Use It:**
|
|
1576
|
+
|
|
1577
|
+
- **Start the agent**: `agent-cli chat --input-device-index 1 --tts`
|
|
1578
|
+
- **Have a conversation**:
|
|
1579
|
+
- _You_: "Read the pyproject.toml file and tell me the project version."
|
|
1580
|
+
- _AI_: (Reads file) "The project version is 0.1.0."
|
|
1581
|
+
- _You_: "Thanks!"
|
|
1582
|
+
|
|
1583
|
+
<details>
|
|
1584
|
+
<summary>See the output of <code>agent-cli chat --help</code></summary>
|
|
1585
|
+
|
|
1586
|
+
<!-- CODE:BASH:START -->
|
|
1587
|
+
<!-- echo '```yaml' -->
|
|
1588
|
+
<!-- export NO_COLOR=1 -->
|
|
1589
|
+
<!-- export TERM=dumb -->
|
|
1590
|
+
<!-- export COLUMNS=90 -->
|
|
1591
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
1592
|
+
<!-- agent-cli chat --help -->
|
|
1593
|
+
<!-- echo '```' -->
|
|
1594
|
+
<!-- CODE:END -->
|
|
1595
|
+
<!-- OUTPUT:START -->
|
|
1596
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
1597
|
+
```yaml
|
|
1598
|
+
|
|
1599
|
+
Usage: agent-cli chat [OPTIONS]
|
|
1600
|
+
|
|
1601
|
+
An chat agent that you can talk to.
|
|
1602
|
+
|
|
1603
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
1604
|
+
│ --help -h Show this message and exit. │
|
|
1605
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1606
|
+
╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
|
|
1607
|
+
│ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
|
|
1608
|
+
│ [env var: ASR_PROVIDER] │
|
|
1609
|
+
│ [default: wyoming] │
|
|
1610
|
+
│ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
|
|
1611
|
+
│ [env var: LLM_PROVIDER] │
|
|
1612
|
+
│ [default: ollama] │
|
|
1613
|
+
│ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
|
|
1614
|
+
│ 'gemini'). │
|
|
1615
|
+
│ [env var: TTS_PROVIDER] │
|
|
1616
|
+
│ [default: wyoming] │
|
|
1617
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1618
|
+
╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
|
|
1619
|
+
│ --input-device-index INTEGER Index of the audio input device to use. │
|
|
1620
|
+
│ --input-device-name TEXT Device name keywords for partial matching. │
|
|
1621
|
+
│ --list-devices List available audio input and output devices and │
|
|
1622
|
+
│ exit. │
|
|
1623
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1624
|
+
╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
|
|
1625
|
+
│ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
|
|
1626
|
+
│ [env var: ASR_WYOMING_IP] │
|
|
1627
|
+
│ [default: localhost] │
|
|
1628
|
+
│ --asr-wyoming-port INTEGER Wyoming ASR server port. │
|
|
1629
|
+
│ [env var: ASR_WYOMING_PORT] │
|
|
1630
|
+
│ [default: 10300] │
|
|
1631
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1632
|
+
╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
|
|
1633
|
+
│ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
|
|
1634
|
+
│ [env var: ASR_OPENAI_MODEL] │
|
|
1635
|
+
│ [default: whisper-1] │
|
|
1636
|
+
│ --asr-openai-base-url TEXT Custom base URL for OpenAI-compatible ASR API │
|
|
1637
|
+
│ (e.g., for custom Whisper server: │
|
|
1638
|
+
│ http://localhost:9898). │
|
|
1639
|
+
│ [env var: ASR_OPENAI_BASE_URL] │
|
|
1640
|
+
│ --asr-openai-prompt TEXT Custom prompt to guide transcription (optional). │
|
|
1641
|
+
│ [env var: ASR_OPENAI_PROMPT] │
|
|
1642
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1643
|
+
╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
|
|
1644
|
+
│ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
|
|
1645
|
+
│ [env var: ASR_GEMINI_MODEL] │
|
|
1646
|
+
│ [default: gemini-3-flash-preview] │
|
|
1647
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1648
|
+
╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
|
|
1649
|
+
│ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
|
|
1650
|
+
│ [env var: LLM_OLLAMA_MODEL] │
|
|
1651
|
+
│ [default: gemma3:4b] │
|
|
1652
|
+
│ --llm-ollama-host TEXT The Ollama server host. Default is │
|
|
1653
|
+
│ http://localhost:11434. │
|
|
1654
|
+
│ [env var: LLM_OLLAMA_HOST] │
|
|
1655
|
+
│ [default: http://localhost:11434] │
|
|
1656
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1657
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
1658
|
+
│ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
|
|
1659
|
+
│ [env var: LLM_OPENAI_MODEL] │
|
|
1660
|
+
│ [default: gpt-5-mini] │
|
|
1661
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
1662
|
+
│ OPENAI_API_KEY environment variable. │
|
|
1663
|
+
│ [env var: OPENAI_API_KEY] │
|
|
1664
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
1665
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
1666
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
1667
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1668
|
+
╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
|
|
1669
|
+
│ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
|
|
1670
|
+
│ [env var: LLM_GEMINI_MODEL] │
|
|
1671
|
+
│ [default: gemini-3-flash-preview] │
|
|
1672
|
+
│ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
|
|
1673
|
+
│ GEMINI_API_KEY environment variable. │
|
|
1674
|
+
│ [env var: GEMINI_API_KEY] │
|
|
1675
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1676
|
+
╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
|
|
1677
|
+
│ --tts --no-tts Enable text-to-speech for responses. │
|
|
1678
|
+
│ [default: no-tts] │
|
|
1679
|
+
│ --output-device-index INTEGER Index of the audio output device to use │
|
|
1680
|
+
│ for TTS. │
|
|
1681
|
+
│ --output-device-name TEXT Output device name keywords for partial │
|
|
1682
|
+
│ matching. │
|
|
1683
|
+
│ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, │
|
|
1684
|
+
│ 2.0 = twice as fast, 0.5 = half speed). │
|
|
1685
|
+
│ [default: 1.0] │
|
|
1686
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1687
|
+
╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
|
|
1688
|
+
│ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
|
|
1689
|
+
│ [default: localhost] │
|
|
1690
|
+
│ --tts-wyoming-port INTEGER Wyoming TTS server port. │
|
|
1691
|
+
│ [default: 10200] │
|
|
1692
|
+
│ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
|
|
1693
|
+
│ 'en_US-lessac-medium'). │
|
|
1694
|
+
│ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
|
|
1695
|
+
│ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
|
|
1696
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1697
|
+
╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
|
|
1698
|
+
│ --tts-openai-model TEXT The OpenAI model to use for TTS. │
|
|
1699
|
+
│ [default: tts-1] │
|
|
1700
|
+
│ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
|
|
1701
|
+
│ [default: alloy] │
|
|
1702
|
+
│ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
|
|
1703
|
+
│ (e.g., http://localhost:8000/v1 for a proxy). │
|
|
1704
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1705
|
+
╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
|
|
1706
|
+
│ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
|
|
1707
|
+
│ [default: kokoro] │
|
|
1708
|
+
│ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
|
|
1709
|
+
│ [default: af_sky] │
|
|
1710
|
+
│ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
|
|
1711
|
+
│ [default: http://localhost:8880/v1] │
|
|
1712
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1713
|
+
╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
|
|
1714
|
+
│ --tts-gemini-model TEXT The Gemini model to use for TTS. │
|
|
1715
|
+
│ [default: gemini-2.5-flash-preview-tts] │
|
|
1716
|
+
│ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
|
|
1717
|
+
│ 'Charon', 'Fenrir'). │
|
|
1718
|
+
│ [default: Kore] │
|
|
1719
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1720
|
+
╭─ Process Management ───────────────────────────────────────────────────────────────────╮
|
|
1721
|
+
│ --stop Stop any running background process. │
|
|
1722
|
+
│ --status Check if a background process is running. │
|
|
1723
|
+
│ --toggle Toggle the background process on/off. If the process is running, it │
|
|
1724
|
+
│ will be stopped. If the process is not running, it will be started. │
|
|
1725
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1726
|
+
╭─ History Options ──────────────────────────────────────────────────────────────────────╮
|
|
1727
|
+
│ --history-dir PATH Directory to store conversation history. │
|
|
1728
|
+
│ [default: ~/.config/agent-cli/history] │
|
|
1729
|
+
│ --last-n-messages INTEGER Number of messages to include in the conversation │
|
|
1730
|
+
│ history. Set to 0 to disable history. │
|
|
1731
|
+
│ [default: 50] │
|
|
1732
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1733
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1734
|
+
│ --save-file PATH Save TTS response audio to WAV file. │
|
|
1735
|
+
│ --log-level [debug|info|warning|error] Set logging level. │
|
|
1736
|
+
│ [env var: LOG_LEVEL] │
|
|
1737
|
+
│ [default: info] │
|
|
1738
|
+
│ --log-file TEXT Path to a file to write logs to. │
|
|
1739
|
+
│ --quiet -q Suppress console output from rich. │
|
|
1740
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
1741
|
+
│ --print-args Print the command line arguments, │
|
|
1742
|
+
│ including variables taken from the │
|
|
1743
|
+
│ configuration file. │
|
|
1744
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1745
|
+
|
|
1746
|
+
```
|
|
1747
|
+
|
|
1748
|
+
<!-- OUTPUT:END -->
|
|
1749
|
+
|
|
1750
|
+
</details>
|
|
1751
|
+
|
|
1752
|
+
|
|
1753
|
+
### `rag-proxy`
|
|
1754
|
+
|
|
1755
|
+
**Purpose:** Enables "Chat with your Data" by running a local proxy server that injects document context into LLM requests.
|
|
1756
|
+
|
|
1757
|
+
**Workflow:**
|
|
1758
|
+
|
|
1759
|
+
1. Start the server, pointing it to your documents folder and your local LLM (e.g., Ollama or llama.cpp) or OpenAI.
|
|
1760
|
+
2. The server watches the folder and automatically indexes any text/markdown/PDF files into a local ChromaDB vector store.
|
|
1761
|
+
3. Point any OpenAI-compatible client (including `agent-cli chat`) to this server's URL.
|
|
1762
|
+
4. When you ask a question, the server retrieves relevant document chunks, adds them to the prompt, and forwards it to the LLM.
|
|
1763
|
+
|
|
1764
|
+
**How to Use It:**
|
|
1765
|
+
|
|
1766
|
+
- **Install RAG deps first**: `pip install "agent-cli[rag]"` (or, from the repo, `uv sync --extra rag`)
|
|
1767
|
+
- **Start Server (Local LLM)**: `agent-cli rag-proxy --docs-folder ~/Documents/Notes --openai-base-url http://localhost:11434/v1 --port 8000`
|
|
1768
|
+
- **Start Server (OpenAI)**: `agent-cli rag-proxy --docs-folder ~/Documents/Notes --openai-api-key sk-...`
|
|
1769
|
+
- **Use with Agent-CLI**: `agent-cli chat --openai-base-url http://localhost:8000/v1 --llm-provider openai`
|
|
1770
|
+
|
|
1771
|
+
<details>
|
|
1772
|
+
<summary>See the output of <code>agent-cli rag-proxy --help</code></summary>
|
|
1773
|
+
|
|
1774
|
+
<!-- CODE:BASH:START -->
|
|
1775
|
+
<!-- echo '```yaml' -->
|
|
1776
|
+
<!-- export NO_COLOR=1 -->
|
|
1777
|
+
<!-- export TERM=dumb -->
|
|
1778
|
+
<!-- export COLUMNS=90 -->
|
|
1779
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
1780
|
+
<!-- agent-cli rag-proxy --help -->
|
|
1781
|
+
<!-- echo '```' -->
|
|
1782
|
+
<!-- CODE:END -->
|
|
1783
|
+
<!-- OUTPUT:START -->
|
|
1784
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
1785
|
+
```yaml
|
|
1786
|
+
|
|
1787
|
+
Usage: agent-cli rag-proxy [OPTIONS]
|
|
1788
|
+
|
|
1789
|
+
Start the RAG (Retrieval-Augmented Generation) Proxy Server.
|
|
1790
|
+
|
|
1791
|
+
This server watches a folder for documents, indexes them, and provides an
|
|
1792
|
+
OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
|
|
1793
|
+
relevant context from the documents.
|
|
1794
|
+
|
|
1795
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
1796
|
+
│ --help -h Show this message and exit. │
|
|
1797
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1798
|
+
╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
|
|
1799
|
+
│ --docs-folder PATH Folder to watch for documents │
|
|
1800
|
+
│ [default: ./rag_docs] │
|
|
1801
|
+
│ --chroma-path PATH Path to ChromaDB persistence directory │
|
|
1802
|
+
│ [default: ./rag_db] │
|
|
1803
|
+
│ --limit INTEGER Number of document chunks to retrieve per │
|
|
1804
|
+
│ query. │
|
|
1805
|
+
│ [default: 3] │
|
|
1806
|
+
│ --rag-tools --no-rag-tools Allow agent to fetch full documents when │
|
|
1807
|
+
│ snippets are insufficient. │
|
|
1808
|
+
│ [default: rag-tools] │
|
|
1809
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1810
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
1811
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
1812
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
1813
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
1814
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
1815
|
+
│ OPENAI_API_KEY environment variable. │
|
|
1816
|
+
│ [env var: OPENAI_API_KEY] │
|
|
1817
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1818
|
+
╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
|
|
1819
|
+
│ --embedding-model TEXT Embedding model to use for vectorization. │
|
|
1820
|
+
│ [default: text-embedding-3-small] │
|
|
1821
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1822
|
+
╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
|
|
1823
|
+
│ --host TEXT Host/IP to bind API servers to. │
|
|
1824
|
+
│ [default: 0.0.0.0] │
|
|
1825
|
+
│ --port INTEGER Port to bind to │
|
|
1826
|
+
│ [default: 8000] │
|
|
1827
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1828
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1829
|
+
│ --log-level [debug|info|warning|error] Set logging level. │
|
|
1830
|
+
│ [env var: LOG_LEVEL] │
|
|
1831
|
+
│ [default: info] │
|
|
1832
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
1833
|
+
│ --print-args Print the command line arguments, │
|
|
1834
|
+
│ including variables taken from the │
|
|
1835
|
+
│ configuration file. │
|
|
1836
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1837
|
+
|
|
1838
|
+
```
|
|
1839
|
+
|
|
1840
|
+
<!-- OUTPUT:END -->
|
|
1841
|
+
|
|
1842
|
+
</details>
|
|
1843
|
+
|
|
1844
|
+
### `memory`
|
|
1845
|
+
|
|
1846
|
+
The `memory proxy` command is the core feature—a middleware server that gives any OpenAI-compatible app long-term memory. Additional subcommands (`memory add`, etc.) help manage the memory store directly.
|
|
1847
|
+
|
|
1848
|
+
#### `memory proxy`
|
|
1849
|
+
|
|
1850
|
+
**Purpose:** Adds long-term conversational memory (self-hosted) to any OpenAI-compatible client.
|
|
1851
|
+
|
|
1852
|
+
**Key Features:**
|
|
1853
|
+
|
|
1854
|
+
- **Simple Markdown Files:** Your memories are stored as human-readable Markdown files, serving as the ultimate source of truth.
|
|
1855
|
+
- **Automatic Version Control:** Built-in Git integration automatically commits changes, giving you a full history of your memory's evolution.
|
|
1856
|
+
- **Lightweight & Local:** Minimal dependencies and runs entirely on your machine.
|
|
1857
|
+
- **Proxy Middleware:** Works transparently with any OpenAI-compatible `/chat/completions` endpoint (OpenAI, Ollama, vLLM).
|
|
1858
|
+
|
|
1859
|
+
**Workflow:**
|
|
1860
|
+
|
|
1861
|
+
- Stores a per-conversation memory collection in Chroma with the same embedding settings as `rag-proxy`, reranked with a cross-encoder.
|
|
1862
|
+
- For each turn, retrieves the top-k relevant memories (conversation + global) plus a rolling summary and augments the prompt.
|
|
1863
|
+
- After each reply, extracts salient facts and refreshes the running summary (disable with `--no-summarization`).
|
|
1864
|
+
- Enforces a per-conversation cap (`--max-entries`, default 500) and evicts oldest memories first.
|
|
1865
|
+
|
|
1866
|
+
**How to Use It:**
|
|
1867
|
+
|
|
1868
|
+
- **Install memory deps first**: `pip install "agent-cli[memory]"` (or, from the repo, `uv sync --extra memory`)
|
|
1869
|
+
- **Start Server (Local LLM/OpenAI-compatible)**: `agent-cli memory proxy --memory-path ./memory_db --openai-base-url http://localhost:11434/v1 --embedding-model embeddinggemma:300m`
|
|
1870
|
+
- **Use with Agent-CLI**: `agent-cli chat --openai-base-url http://localhost:8100/v1 --llm-provider openai`
|
|
1871
|
+
|
|
1872
|
+
<details>
|
|
1873
|
+
<summary>See the output of <code>agent-cli memory proxy --help</code></summary>
|
|
1874
|
+
|
|
1875
|
+
<!-- CODE:BASH:START -->
|
|
1876
|
+
<!-- echo '```yaml' -->
|
|
1877
|
+
<!-- export NO_COLOR=1 -->
|
|
1878
|
+
<!-- export TERM=dumb -->
|
|
1879
|
+
<!-- export COLUMNS=90 -->
|
|
1880
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
1881
|
+
<!-- agent-cli memory proxy --help -->
|
|
1882
|
+
<!-- echo '```' -->
|
|
1883
|
+
<!-- CODE:END -->
|
|
1884
|
+
<!-- OUTPUT:START -->
|
|
1885
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
1886
|
+
```yaml
|
|
1887
|
+
|
|
1888
|
+
Usage: agent-cli memory proxy [OPTIONS]
|
|
1889
|
+
|
|
1890
|
+
Start the memory-backed chat proxy server.
|
|
1891
|
+
|
|
1892
|
+
This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
|
|
1893
|
+
plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
|
|
1894
|
+
|
|
1895
|
+
Key Features:
|
|
1896
|
+
|
|
1897
|
+
• Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
|
|
1898
|
+
as the ultimate source of truth.
|
|
1899
|
+
• Automatic Version Control: Built-in Git integration automatically commits changes,
|
|
1900
|
+
providing a full history of memory evolution.
|
|
1901
|
+
• Lightweight & Local: Minimal dependencies and runs entirely on your machine.
|
|
1902
|
+
• Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
|
|
1903
|
+
endpoint.
|
|
1904
|
+
|
|
1905
|
+
How it works:
|
|
1906
|
+
|
|
1907
|
+
1 Intercepts POST /v1/chat/completions requests.
|
|
1908
|
+
2 Retrieves relevant memories (facts, previous conversations) from a local vector
|
|
1909
|
+
database (ChromaDB) based on the user's query.
|
|
1910
|
+
3 Injects these memories into the system prompt.
|
|
1911
|
+
4 Forwards the augmented request to the real LLM (--openai-base-url).
|
|
1912
|
+
5 Extracts new facts from the conversation in the background and updates the long-term
|
|
1913
|
+
memory store (including handling contradictions).
|
|
1914
|
+
|
|
1915
|
+
Use this to give "long-term memory" to any OpenAI-compatible application. Point your
|
|
1916
|
+
client's base URL to http://localhost:8100/v1.
|
|
1917
|
+
|
|
1918
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
1919
|
+
│ --help -h Show this message and exit. │
|
|
1920
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1921
|
+
╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
|
|
1922
|
+
│ --memory-path PATH Path to the memory store (files + │
|
|
1923
|
+
│ derived vector index). │
|
|
1924
|
+
│ [default: ./memory_db] │
|
|
1925
|
+
│ --default-top-k INTEGER Number of memory entries to │
|
|
1926
|
+
│ retrieve per query. │
|
|
1927
|
+
│ [default: 5] │
|
|
1928
|
+
│ --max-entries INTEGER Maximum stored memory entries per │
|
|
1929
|
+
│ conversation (excluding summary). │
|
|
1930
|
+
│ [default: 500] │
|
|
1931
|
+
│ --mmr-lambda FLOAT MMR lambda (0-1): higher favors │
|
|
1932
|
+
│ relevance, lower favors │
|
|
1933
|
+
│ diversity. │
|
|
1934
|
+
│ [default: 0.7] │
|
|
1935
|
+
│ --recency-weight FLOAT Recency score weight (0.0-1.0). │
|
|
1936
|
+
│ Controls freshness vs. relevance. │
|
|
1937
|
+
│ Default 0.2 (20% recency, 80% │
|
|
1938
|
+
│ semantic relevance). │
|
|
1939
|
+
│ [default: 0.2] │
|
|
1940
|
+
│ --score-threshold FLOAT Minimum semantic relevance │
|
|
1941
|
+
│ threshold (0.0-1.0). Memories │
|
|
1942
|
+
│ below this score are discarded to │
|
|
1943
|
+
│ reduce noise. │
|
|
1944
|
+
│ [default: 0.35] │
|
|
1945
|
+
│ --summarization --no-summarization Enable automatic fact extraction │
|
|
1946
|
+
│ and summaries. │
|
|
1947
|
+
│ [default: summarization] │
|
|
1948
|
+
│ --git-versioning --no-git-versioning Enable automatic git commit of │
|
|
1949
|
+
│ memory changes. │
|
|
1950
|
+
│ [default: git-versioning] │
|
|
1951
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1952
|
+
╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
|
|
1953
|
+
│ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
|
|
1954
|
+
│ llama-server: http://localhost:8080/v1). │
|
|
1955
|
+
│ [env var: OPENAI_BASE_URL] │
|
|
1956
|
+
│ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
|
|
1957
|
+
│ OPENAI_API_KEY environment variable. │
|
|
1958
|
+
│ [env var: OPENAI_API_KEY] │
|
|
1959
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1960
|
+
╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
|
|
1961
|
+
│ --embedding-model TEXT Embedding model to use for vectorization. │
|
|
1962
|
+
│ [default: text-embedding-3-small] │
|
|
1963
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1964
|
+
╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
|
|
1965
|
+
│ --host TEXT Host/IP to bind API servers to. │
|
|
1966
|
+
│ [default: 0.0.0.0] │
|
|
1967
|
+
│ --port INTEGER Port to bind to │
|
|
1968
|
+
│ [default: 8100] │
|
|
1969
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1970
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
1971
|
+
│ --log-level [debug|info|warning|error] Set logging level. │
|
|
1972
|
+
│ [env var: LOG_LEVEL] │
|
|
1973
|
+
│ [default: info] │
|
|
1974
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
1975
|
+
│ --print-args Print the command line arguments, │
|
|
1976
|
+
│ including variables taken from the │
|
|
1977
|
+
│ configuration file. │
|
|
1978
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
1979
|
+
|
|
1980
|
+
```
|
|
1981
|
+
|
|
1982
|
+
<!-- OUTPUT:END -->
|
|
1983
|
+
|
|
1984
|
+
</details>
|
|
1985
|
+
|
|
1986
|
+
#### `memory add`
|
|
1987
|
+
|
|
1988
|
+
**Purpose:** Directly add memories to the store without LLM extraction. Useful for bulk imports or seeding memories.
|
|
1989
|
+
|
|
1990
|
+
**How to Use It:**
|
|
1991
|
+
|
|
1992
|
+
```bash
|
|
1993
|
+
# Add single memories as arguments
|
|
1994
|
+
agent-cli memory add "User likes coffee" "User lives in Amsterdam"
|
|
1995
|
+
|
|
1996
|
+
# Read from JSON file
|
|
1997
|
+
agent-cli memory add -f memories.json
|
|
1998
|
+
|
|
1999
|
+
# Read from stdin (plain text, one per line)
|
|
2000
|
+
echo "User prefers dark mode" | agent-cli memory add -f -
|
|
2001
|
+
|
|
2002
|
+
# Read JSON from stdin
|
|
2003
|
+
echo '["Fact one", "Fact two"]' | agent-cli memory add -f -
|
|
2004
|
+
|
|
2005
|
+
# Specify conversation ID
|
|
2006
|
+
agent-cli memory add -c work "Project deadline is Friday"
|
|
2007
|
+
```
|
|
2008
|
+
|
|
2009
|
+
<details>
|
|
2010
|
+
<summary>See the output of <code>agent-cli memory add --help</code></summary>
|
|
2011
|
+
|
|
2012
|
+
<!-- CODE:BASH:START -->
|
|
2013
|
+
<!-- echo '```yaml' -->
|
|
2014
|
+
<!-- export NO_COLOR=1 -->
|
|
2015
|
+
<!-- export TERM=dumb -->
|
|
2016
|
+
<!-- export COLUMNS=90 -->
|
|
2017
|
+
<!-- export TERMINAL_WIDTH=90 -->
|
|
2018
|
+
<!-- agent-cli memory add --help -->
|
|
2019
|
+
<!-- echo '```' -->
|
|
2020
|
+
<!-- CODE:END -->
|
|
2021
|
+
<!-- OUTPUT:START -->
|
|
2022
|
+
<!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
|
|
2023
|
+
```yaml
|
|
2024
|
+
|
|
2025
|
+
Usage: agent-cli memory add [OPTIONS] [MEMORIES]...
|
|
2026
|
+
|
|
2027
|
+
Add memories directly without LLM extraction.
|
|
2028
|
+
|
|
2029
|
+
This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
|
|
2030
|
+
Useful for bulk imports or seeding memories.
|
|
2031
|
+
|
|
2032
|
+
The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
|
|
2033
|
+
be indexed on next memory proxy startup.
|
|
2034
|
+
|
|
2035
|
+
Examples::
|
|
2036
|
+
|
|
2037
|
+
|
|
2038
|
+
# Add single memories as arguments
|
|
2039
|
+
agent-cli memory add "User likes coffee" "User lives in Amsterdam"
|
|
2040
|
+
|
|
2041
|
+
# Read from JSON file
|
|
2042
|
+
agent-cli memory add -f memories.json
|
|
2043
|
+
|
|
2044
|
+
# Read from stdin (plain text, one per line)
|
|
2045
|
+
echo "User prefers dark mode" | agent-cli memory add -f -
|
|
2046
|
+
|
|
2047
|
+
# Read JSON from stdin
|
|
2048
|
+
echo '["Fact one", "Fact two"]' | agent-cli memory add -f -
|
|
2049
|
+
|
|
2050
|
+
# Specify conversation ID
|
|
2051
|
+
agent-cli memory add -c work "Project deadline is Friday"
|
|
2052
|
+
|
|
2053
|
+
|
|
2054
|
+
╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
|
|
2055
|
+
│ memories [MEMORIES]... Memories to add. Each argument becomes one fact. │
|
|
2056
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
2057
|
+
╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
|
|
2058
|
+
│ --file -f PATH Read memories from file. Use '-' │
|
|
2059
|
+
│ for stdin. Supports JSON array, │
|
|
2060
|
+
│ JSON object with 'memories' key, │
|
|
2061
|
+
│ or plain text (one per line). │
|
|
2062
|
+
│ --conversation-id -c TEXT Conversation ID to add memories │
|
|
2063
|
+
│ to. │
|
|
2064
|
+
│ [default: default] │
|
|
2065
|
+
│ --memory-path PATH Path to the memory store. │
|
|
2066
|
+
│ [default: ./memory_db] │
|
|
2067
|
+
│ --git-versioning --no-git-versioning Commit changes to git. │
|
|
2068
|
+
│ [default: git-versioning] │
|
|
2069
|
+
│ --help -h Show this message and exit. │
|
|
2070
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
2071
|
+
╭─ General Options ──────────────────────────────────────────────────────────────────────╮
|
|
2072
|
+
│ --quiet -q Suppress console output from rich. │
|
|
2073
|
+
│ --config TEXT Path to a TOML configuration file. │
|
|
2074
|
+
│ --print-args Print the command line arguments, including variables │
|
|
2075
|
+
│ taken from the configuration file. │
|
|
2076
|
+
╰────────────────────────────────────────────────────────────────────────────────────────╯
|
|
2077
|
+
|
|
2078
|
+
```
|
|
2079
|
+
|
|
2080
|
+
<!-- OUTPUT:END -->
|
|
2081
|
+
|
|
2082
|
+
</details>
|
|
2083
|
+
|
|
2084
|
+
## Development
|
|
2085
|
+
|
|
2086
|
+
### Running Tests
|
|
2087
|
+
|
|
2088
|
+
The project uses `pytest` for testing. To run tests using `uv`:
|
|
2089
|
+
|
|
2090
|
+
```bash
|
|
2091
|
+
uv run pytest
|
|
2092
|
+
```
|
|
2093
|
+
|
|
2094
|
+
### Pre-commit Hooks
|
|
2095
|
+
|
|
2096
|
+
This project uses pre-commit hooks (ruff for linting and formatting, mypy for type checking) to maintain code quality. To set them up:
|
|
2097
|
+
|
|
2098
|
+
1. Install pre-commit:
|
|
2099
|
+
|
|
2100
|
+
```bash
|
|
2101
|
+
pip install pre-commit
|
|
2102
|
+
```
|
|
2103
|
+
|
|
2104
|
+
2. Install the hooks:
|
|
2105
|
+
|
|
2106
|
+
```bash
|
|
2107
|
+
pre-commit install
|
|
2108
|
+
```
|
|
2109
|
+
|
|
2110
|
+
Now, the hooks will run automatically before each commit.
|
|
2111
|
+
|
|
2112
|
+
## Contributing
|
|
2113
|
+
|
|
2114
|
+
Contributions are welcome! If you find a bug or have a feature request, please open an issue. If you'd like to contribute code, please fork the repository and submit a pull request.
|
|
2115
|
+
|
|
2116
|
+
## License
|
|
2117
|
+
|
|
2118
|
+
This project is licensed under the MIT License - see the `LICENSE` file for details.
|