agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. agent_cli/__init__.py +5 -0
  2. agent_cli/__main__.py +6 -0
  3. agent_cli/_extras.json +14 -0
  4. agent_cli/_requirements/.gitkeep +0 -0
  5. agent_cli/_requirements/audio.txt +79 -0
  6. agent_cli/_requirements/faster-whisper.txt +215 -0
  7. agent_cli/_requirements/kokoro.txt +425 -0
  8. agent_cli/_requirements/llm.txt +183 -0
  9. agent_cli/_requirements/memory.txt +355 -0
  10. agent_cli/_requirements/mlx-whisper.txt +222 -0
  11. agent_cli/_requirements/piper.txt +176 -0
  12. agent_cli/_requirements/rag.txt +402 -0
  13. agent_cli/_requirements/server.txt +154 -0
  14. agent_cli/_requirements/speed.txt +77 -0
  15. agent_cli/_requirements/vad.txt +155 -0
  16. agent_cli/_requirements/wyoming.txt +71 -0
  17. agent_cli/_tools.py +368 -0
  18. agent_cli/agents/__init__.py +23 -0
  19. agent_cli/agents/_voice_agent_common.py +136 -0
  20. agent_cli/agents/assistant.py +383 -0
  21. agent_cli/agents/autocorrect.py +284 -0
  22. agent_cli/agents/chat.py +496 -0
  23. agent_cli/agents/memory/__init__.py +31 -0
  24. agent_cli/agents/memory/add.py +190 -0
  25. agent_cli/agents/memory/proxy.py +160 -0
  26. agent_cli/agents/rag_proxy.py +128 -0
  27. agent_cli/agents/speak.py +209 -0
  28. agent_cli/agents/transcribe.py +671 -0
  29. agent_cli/agents/transcribe_daemon.py +499 -0
  30. agent_cli/agents/voice_edit.py +291 -0
  31. agent_cli/api.py +22 -0
  32. agent_cli/cli.py +106 -0
  33. agent_cli/config.py +503 -0
  34. agent_cli/config_cmd.py +307 -0
  35. agent_cli/constants.py +27 -0
  36. agent_cli/core/__init__.py +1 -0
  37. agent_cli/core/audio.py +461 -0
  38. agent_cli/core/audio_format.py +299 -0
  39. agent_cli/core/chroma.py +88 -0
  40. agent_cli/core/deps.py +191 -0
  41. agent_cli/core/openai_proxy.py +139 -0
  42. agent_cli/core/process.py +195 -0
  43. agent_cli/core/reranker.py +120 -0
  44. agent_cli/core/sse.py +87 -0
  45. agent_cli/core/transcription_logger.py +70 -0
  46. agent_cli/core/utils.py +526 -0
  47. agent_cli/core/vad.py +175 -0
  48. agent_cli/core/watch.py +65 -0
  49. agent_cli/dev/__init__.py +14 -0
  50. agent_cli/dev/cli.py +1588 -0
  51. agent_cli/dev/coding_agents/__init__.py +19 -0
  52. agent_cli/dev/coding_agents/aider.py +24 -0
  53. agent_cli/dev/coding_agents/base.py +167 -0
  54. agent_cli/dev/coding_agents/claude.py +39 -0
  55. agent_cli/dev/coding_agents/codex.py +24 -0
  56. agent_cli/dev/coding_agents/continue_dev.py +15 -0
  57. agent_cli/dev/coding_agents/copilot.py +24 -0
  58. agent_cli/dev/coding_agents/cursor_agent.py +48 -0
  59. agent_cli/dev/coding_agents/gemini.py +28 -0
  60. agent_cli/dev/coding_agents/opencode.py +15 -0
  61. agent_cli/dev/coding_agents/registry.py +49 -0
  62. agent_cli/dev/editors/__init__.py +19 -0
  63. agent_cli/dev/editors/base.py +89 -0
  64. agent_cli/dev/editors/cursor.py +15 -0
  65. agent_cli/dev/editors/emacs.py +46 -0
  66. agent_cli/dev/editors/jetbrains.py +56 -0
  67. agent_cli/dev/editors/nano.py +31 -0
  68. agent_cli/dev/editors/neovim.py +33 -0
  69. agent_cli/dev/editors/registry.py +59 -0
  70. agent_cli/dev/editors/sublime.py +20 -0
  71. agent_cli/dev/editors/vim.py +42 -0
  72. agent_cli/dev/editors/vscode.py +15 -0
  73. agent_cli/dev/editors/zed.py +20 -0
  74. agent_cli/dev/project.py +568 -0
  75. agent_cli/dev/registry.py +52 -0
  76. agent_cli/dev/skill/SKILL.md +141 -0
  77. agent_cli/dev/skill/examples.md +571 -0
  78. agent_cli/dev/terminals/__init__.py +19 -0
  79. agent_cli/dev/terminals/apple_terminal.py +82 -0
  80. agent_cli/dev/terminals/base.py +56 -0
  81. agent_cli/dev/terminals/gnome.py +51 -0
  82. agent_cli/dev/terminals/iterm2.py +84 -0
  83. agent_cli/dev/terminals/kitty.py +77 -0
  84. agent_cli/dev/terminals/registry.py +48 -0
  85. agent_cli/dev/terminals/tmux.py +58 -0
  86. agent_cli/dev/terminals/warp.py +132 -0
  87. agent_cli/dev/terminals/zellij.py +78 -0
  88. agent_cli/dev/worktree.py +856 -0
  89. agent_cli/docs_gen.py +417 -0
  90. agent_cli/example-config.toml +185 -0
  91. agent_cli/install/__init__.py +5 -0
  92. agent_cli/install/common.py +89 -0
  93. agent_cli/install/extras.py +174 -0
  94. agent_cli/install/hotkeys.py +48 -0
  95. agent_cli/install/services.py +87 -0
  96. agent_cli/memory/__init__.py +7 -0
  97. agent_cli/memory/_files.py +250 -0
  98. agent_cli/memory/_filters.py +63 -0
  99. agent_cli/memory/_git.py +157 -0
  100. agent_cli/memory/_indexer.py +142 -0
  101. agent_cli/memory/_ingest.py +408 -0
  102. agent_cli/memory/_persistence.py +182 -0
  103. agent_cli/memory/_prompt.py +91 -0
  104. agent_cli/memory/_retrieval.py +294 -0
  105. agent_cli/memory/_store.py +169 -0
  106. agent_cli/memory/_streaming.py +44 -0
  107. agent_cli/memory/_tasks.py +48 -0
  108. agent_cli/memory/api.py +113 -0
  109. agent_cli/memory/client.py +272 -0
  110. agent_cli/memory/engine.py +361 -0
  111. agent_cli/memory/entities.py +43 -0
  112. agent_cli/memory/models.py +112 -0
  113. agent_cli/opts.py +433 -0
  114. agent_cli/py.typed +0 -0
  115. agent_cli/rag/__init__.py +3 -0
  116. agent_cli/rag/_indexer.py +67 -0
  117. agent_cli/rag/_indexing.py +226 -0
  118. agent_cli/rag/_prompt.py +30 -0
  119. agent_cli/rag/_retriever.py +156 -0
  120. agent_cli/rag/_store.py +48 -0
  121. agent_cli/rag/_utils.py +218 -0
  122. agent_cli/rag/api.py +175 -0
  123. agent_cli/rag/client.py +299 -0
  124. agent_cli/rag/engine.py +302 -0
  125. agent_cli/rag/models.py +55 -0
  126. agent_cli/scripts/.runtime/.gitkeep +0 -0
  127. agent_cli/scripts/__init__.py +1 -0
  128. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  129. agent_cli/scripts/linux-hotkeys/README.md +63 -0
  130. agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
  131. agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
  132. agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
  133. agent_cli/scripts/macos-hotkeys/README.md +45 -0
  134. agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
  135. agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
  136. agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
  137. agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
  138. agent_cli/scripts/nvidia-asr-server/README.md +99 -0
  139. agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
  140. agent_cli/scripts/nvidia-asr-server/server.py +255 -0
  141. agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
  142. agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
  143. agent_cli/scripts/run-openwakeword.sh +11 -0
  144. agent_cli/scripts/run-piper-windows.ps1 +30 -0
  145. agent_cli/scripts/run-piper.sh +24 -0
  146. agent_cli/scripts/run-whisper-linux.sh +40 -0
  147. agent_cli/scripts/run-whisper-macos.sh +6 -0
  148. agent_cli/scripts/run-whisper-windows.ps1 +51 -0
  149. agent_cli/scripts/run-whisper.sh +9 -0
  150. agent_cli/scripts/run_faster_whisper_server.py +136 -0
  151. agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
  152. agent_cli/scripts/setup-linux.sh +108 -0
  153. agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
  154. agent_cli/scripts/setup-macos.sh +76 -0
  155. agent_cli/scripts/setup-windows.ps1 +63 -0
  156. agent_cli/scripts/start-all-services-windows.ps1 +53 -0
  157. agent_cli/scripts/start-all-services.sh +178 -0
  158. agent_cli/scripts/sync_extras.py +138 -0
  159. agent_cli/server/__init__.py +3 -0
  160. agent_cli/server/cli.py +721 -0
  161. agent_cli/server/common.py +222 -0
  162. agent_cli/server/model_manager.py +288 -0
  163. agent_cli/server/model_registry.py +225 -0
  164. agent_cli/server/proxy/__init__.py +3 -0
  165. agent_cli/server/proxy/api.py +444 -0
  166. agent_cli/server/streaming.py +67 -0
  167. agent_cli/server/tts/__init__.py +3 -0
  168. agent_cli/server/tts/api.py +335 -0
  169. agent_cli/server/tts/backends/__init__.py +82 -0
  170. agent_cli/server/tts/backends/base.py +139 -0
  171. agent_cli/server/tts/backends/kokoro.py +403 -0
  172. agent_cli/server/tts/backends/piper.py +253 -0
  173. agent_cli/server/tts/model_manager.py +201 -0
  174. agent_cli/server/tts/model_registry.py +28 -0
  175. agent_cli/server/tts/wyoming_handler.py +249 -0
  176. agent_cli/server/whisper/__init__.py +3 -0
  177. agent_cli/server/whisper/api.py +413 -0
  178. agent_cli/server/whisper/backends/__init__.py +89 -0
  179. agent_cli/server/whisper/backends/base.py +97 -0
  180. agent_cli/server/whisper/backends/faster_whisper.py +225 -0
  181. agent_cli/server/whisper/backends/mlx.py +270 -0
  182. agent_cli/server/whisper/languages.py +116 -0
  183. agent_cli/server/whisper/model_manager.py +157 -0
  184. agent_cli/server/whisper/model_registry.py +28 -0
  185. agent_cli/server/whisper/wyoming_handler.py +203 -0
  186. agent_cli/services/__init__.py +343 -0
  187. agent_cli/services/_wyoming_utils.py +64 -0
  188. agent_cli/services/asr.py +506 -0
  189. agent_cli/services/llm.py +228 -0
  190. agent_cli/services/tts.py +450 -0
  191. agent_cli/services/wake_word.py +142 -0
  192. agent_cli-0.70.5.dist-info/METADATA +2118 -0
  193. agent_cli-0.70.5.dist-info/RECORD +196 -0
  194. agent_cli-0.70.5.dist-info/WHEEL +4 -0
  195. agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
  196. agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,2118 @@
1
+ Metadata-Version: 2.4
2
+ Name: agent-cli
3
+ Version: 0.70.5
4
+ Summary: A suite of AI-powered command-line tools for text correction, audio transcription, and voice assistance.
5
+ Project-URL: Homepage, https://github.com/basnijholt/agent-cli
6
+ Author-email: Bas Nijholt <bas@nijho.lt>
7
+ License-File: LICENSE
8
+ Requires-Python: <3.14,>=3.11
9
+ Requires-Dist: dotenv
10
+ Requires-Dist: httpx
11
+ Requires-Dist: psutil; sys_platform == 'win32'
12
+ Requires-Dist: pydantic
13
+ Requires-Dist: pyperclip
14
+ Requires-Dist: rich
15
+ Requires-Dist: setproctitle
16
+ Requires-Dist: typer
17
+ Requires-Dist: typer-slim[standard]
18
+ Provides-Extra: audio
19
+ Requires-Dist: numpy; extra == 'audio'
20
+ Requires-Dist: sounddevice>=0.4.6; extra == 'audio'
21
+ Requires-Dist: wyoming>=1.5.2; extra == 'audio'
22
+ Provides-Extra: dev
23
+ Requires-Dist: markdown-code-runner>=2.7.0; extra == 'dev'
24
+ Requires-Dist: markdown-gfm-admonition; extra == 'dev'
25
+ Requires-Dist: notebook; extra == 'dev'
26
+ Requires-Dist: pre-commit-uv>=4.1.4; extra == 'dev'
27
+ Requires-Dist: pre-commit>=3.0.0; extra == 'dev'
28
+ Requires-Dist: pylint>=3.0.0; extra == 'dev'
29
+ Requires-Dist: pytest-asyncio>=0.20.0; extra == 'dev'
30
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'dev'
31
+ Requires-Dist: pytest-mock; extra == 'dev'
32
+ Requires-Dist: pytest-timeout; extra == 'dev'
33
+ Requires-Dist: pytest>=7.0.0; extra == 'dev'
34
+ Requires-Dist: ruff; extra == 'dev'
35
+ Requires-Dist: versioningit; extra == 'dev'
36
+ Requires-Dist: zensical; extra == 'dev'
37
+ Provides-Extra: faster-whisper
38
+ Requires-Dist: fastapi[standard]; extra == 'faster-whisper'
39
+ Requires-Dist: faster-whisper>=1.0.0; extra == 'faster-whisper'
40
+ Provides-Extra: kokoro
41
+ Requires-Dist: fastapi[standard]; extra == 'kokoro'
42
+ Requires-Dist: huggingface-hub>=0.20.0; extra == 'kokoro'
43
+ Requires-Dist: kokoro>=0.9.0; extra == 'kokoro'
44
+ Requires-Dist: pip; extra == 'kokoro'
45
+ Requires-Dist: soundfile>=0.12.0; extra == 'kokoro'
46
+ Requires-Dist: transformers>=4.40.0; extra == 'kokoro'
47
+ Provides-Extra: llm
48
+ Requires-Dist: pydantic-ai-slim[duckduckgo,google,openai,vertexai]>=0.1.1; extra == 'llm'
49
+ Provides-Extra: memory
50
+ Requires-Dist: chromadb>=0.4.22; extra == 'memory'
51
+ Requires-Dist: fastapi[standard]; extra == 'memory'
52
+ Requires-Dist: huggingface-hub>=0.20.0; extra == 'memory'
53
+ Requires-Dist: onnxruntime>=1.17.0; extra == 'memory'
54
+ Requires-Dist: pyyaml>=6.0.0; extra == 'memory'
55
+ Requires-Dist: transformers>=4.30.0; extra == 'memory'
56
+ Requires-Dist: watchfiles>=0.21.0; extra == 'memory'
57
+ Provides-Extra: mlx-whisper
58
+ Requires-Dist: fastapi[standard]; (sys_platform == 'darwin' and platform_machine == 'arm64') and extra == 'mlx-whisper'
59
+ Requires-Dist: mlx-whisper>=0.4.0; (sys_platform == 'darwin' and platform_machine == 'arm64') and extra == 'mlx-whisper'
60
+ Provides-Extra: piper
61
+ Requires-Dist: fastapi[standard]; extra == 'piper'
62
+ Requires-Dist: piper-tts>=1.2.0; extra == 'piper'
63
+ Provides-Extra: rag
64
+ Requires-Dist: chromadb>=0.4.22; extra == 'rag'
65
+ Requires-Dist: fastapi[standard]; extra == 'rag'
66
+ Requires-Dist: huggingface-hub>=0.20.0; extra == 'rag'
67
+ Requires-Dist: markitdown[docx,pdf,pptx]>=0.1.3; extra == 'rag'
68
+ Requires-Dist: onnxruntime>=1.17.0; extra == 'rag'
69
+ Requires-Dist: transformers>=4.30.0; extra == 'rag'
70
+ Requires-Dist: watchfiles>=0.21.0; extra == 'rag'
71
+ Provides-Extra: server
72
+ Requires-Dist: fastapi[standard]; extra == 'server'
73
+ Provides-Extra: speed
74
+ Requires-Dist: audiostretchy>=1.3.0; extra == 'speed'
75
+ Provides-Extra: test
76
+ Requires-Dist: pytest-asyncio>=0.20.0; extra == 'test'
77
+ Requires-Dist: pytest-cov>=4.0.0; extra == 'test'
78
+ Requires-Dist: pytest-mock; extra == 'test'
79
+ Requires-Dist: pytest-timeout; extra == 'test'
80
+ Requires-Dist: pytest>=7.0.0; extra == 'test'
81
+ Provides-Extra: vad
82
+ Requires-Dist: silero-vad>=5.1; extra == 'vad'
83
+ Provides-Extra: wyoming
84
+ Requires-Dist: wyoming>=1.5.2; extra == 'wyoming'
85
+ Description-Content-Type: text/markdown
86
+
87
+ # Agent CLI
88
+
89
+ <img src="https://raw.githubusercontent.com/basnijholt/agent-cli/refs/heads/main/.github/logo.svg" alt="agent-cli logo" align="right" style="width: 250px;" />
90
+
91
+ `agent-cli` is a collection of **_local-first_**, AI-powered command-line agents that run entirely on your machine.
92
+ It provides a suite of powerful tools for voice and text interaction, designed for privacy, offline capability, and seamless integration with system-wide hotkeys and workflows.
93
+
94
+ > [!TIP]
95
+ > **Short aliases available:** You can use `agent` or `ag` instead of `agent-cli` for convenience.
96
+
97
+ > [!IMPORTANT]
98
+ > **Local and Private by Design**
99
+ > All agents in this tool are designed to run **100% locally**.
100
+ > Your data, whether it's from your clipboard, microphone, or files, is never sent to any cloud API.
101
+ > This ensures your privacy and allows the tools to work completely offline.
102
+ > You can also optionally configure the agents to use OpenAI/Gemini services.
103
+
104
+ <!-- SECTION:why-i-built-this:START -->
105
+ ## Why I built this
106
+
107
+ I got tired of typing long prompts to LLMs. Speaking is faster, so I built this tool to transcribe my voice directly to the clipboard with a hotkey.
108
+
109
+ **What it does:**
110
+
111
+ - Voice transcription to clipboard with system-wide hotkeys (Cmd+Shift+R on macOS)
112
+ - Autocorrect any text from your clipboard
113
+ - Edit clipboard content with voice commands ("make this more formal")
114
+ - Runs locally - no internet required, your audio stays on your machine
115
+ - Works with any app that can copy/paste
116
+
117
+ I use it mostly for the `transcribe` command when working with LLMs. Being able to speak naturally means I can provide more context without the typing fatigue.
118
+
119
+ Since then I have expanded the tool with many more features, all focused on local-first AI agents that integrate seamlessly with your system.
120
+ <!-- SECTION:why-i-built-this:END -->
121
+
122
+ [![A demo video of Agent-CLI showing local AI voice and text tools on a desktop.](http://img.youtube.com/vi/7sBTCgttH48/0.jpg)](http://www.youtube.com/watch?v=7sBTCgttH48 "Agent-CLI: Local AI Voice & Text Tools on Your Desktop (macOS Demo)")
123
+
124
+ *See agent-cli in action: [Watch the demo](https://www.youtube.com/watch?v=7sBTCgttH48)*
125
+
126
+ ## Features
127
+
128
+ - **[`autocorrect`](docs/commands/autocorrect.md)**: Correct grammar and spelling in your text using a local LLM.
129
+ - **[`transcribe`](docs/commands/transcribe.md)**: Transcribe audio from your microphone to clipboard.
130
+ - **[`speak`](docs/commands/speak.md)**: Convert text to speech using a local TTS engine.
131
+ - **[`voice-edit`](docs/commands/voice-edit.md)**: Edit clipboard text with voice commands.
132
+ - **[`assistant`](docs/commands/assistant.md)**: Wake word-based voice assistant.
133
+ - **[`chat`](docs/commands/chat.md)**: Conversational AI with tool-calling capabilities.
134
+ - **[`memory`](docs/commands/memory.md)**: Long-term memory system with `memory proxy` and `memory add`.
135
+ - **[`rag-proxy`](docs/commands/rag-proxy.md)**: RAG proxy server for chatting with your documents.
136
+ - **[`dev`](docs/commands/dev.md)**: Parallel development with git worktrees and AI coding agents.
137
+ - **[`server`](docs/commands/server/index.md)**: Local ASR and TTS servers with dual-protocol (Wyoming & OpenAI), TTL-based memory management, and multi-platform acceleration. Whisper uses MLX on Apple Silicon or Faster Whisper on Linux/CUDA. TTS supports Kokoro (GPU) or Piper (CPU).
138
+ - **[`transcribe-daemon`](docs/commands/transcribe-daemon.md)**: Continuous background transcription with VAD. Install with `uv tool install "agent-cli[vad]" -p 3.13`.
139
+
140
+ ## Quick Start
141
+
142
+ ### Just want the CLI tool?
143
+
144
+ If you already have AI services running (or plan to use OpenAI), simply install:
145
+
146
+ ```bash
147
+ # Using uv (recommended)
148
+ uv tool install agent-cli -p 3.13
149
+
150
+ # Using pip
151
+ pip install agent-cli
152
+ ```
153
+
154
+ > [!NOTE]
155
+ > The `-p 3.13` flag is required because some dependencies (like `onnxruntime`) don't support Python 3.14 yet.
156
+ > See [uv issue #8206](https://github.com/astral-sh/uv/issues/8206) for details.
157
+
158
+ Then use it:
159
+ ```bash
160
+ agent-cli autocorrect "this has an eror"
161
+ ```
162
+
163
+ ### Want automatic setup with everything?
164
+
165
+ We offer two ways to set up agent-cli with all services:
166
+
167
+ #### Option A: Using Shell Scripts (Traditional)
168
+
169
+ ```bash
170
+ # 1. Clone the repository
171
+ git clone https://github.com/basnijholt/agent-cli.git
172
+ cd agent-cli
173
+
174
+ # 2. Run setup (installs all services + agent-cli)
175
+ ./scripts/setup-macos.sh # or setup-linux.sh
176
+
177
+ # 3. Start services
178
+ ./scripts/start-all-services.sh
179
+
180
+ # 4. (Optional) Set up system-wide hotkeys
181
+ ./scripts/setup-macos-hotkeys.sh # or setup-linux-hotkeys.sh
182
+
183
+ # 5. Use it!
184
+ agent-cli autocorrect "this has an eror"
185
+ ```
186
+
187
+ #### Option B: Using CLI Commands (New!)
188
+
189
+ > [!NOTE]
190
+ > `agent-cli` uses `sounddevice` for real-time microphone/voice features.
191
+ > On Linux only, you need to install the system-level PortAudio library (`sudo apt install portaudio19-dev` / your distro's equivalent on Linux) **before** you run `uv tool install agent-cli -p 3.13`.
192
+ > On Windows and macOS, this is handled automatically.
193
+
194
+ ```bash
195
+ # 1. Install agent-cli
196
+ uv tool install agent-cli -p 3.13
197
+
198
+ # 2. Install all required services
199
+ agent-cli install-services
200
+
201
+ # 3. Start all services
202
+ agent-cli start-services
203
+
204
+ # 4. (Optional) Set up system-wide hotkeys
205
+ agent-cli install-hotkeys
206
+
207
+ # 5. Use it!
208
+ agent-cli autocorrect "this has an eror"
209
+ ```
210
+
211
+ The setup scripts automatically install:
212
+ - ✅ Package managers (Homebrew/uv) if needed
213
+ - ✅ All AI services (Ollama, Whisper, TTS, etc.)
214
+ - ✅ The `agent-cli` tool
215
+ - ✅ System dependencies
216
+ - ✅ Hotkey managers (if using hotkey scripts)
217
+
218
+ <details><summary><b><u>[ToC]</u></b> 📚</summary>
219
+
220
+ <!-- START doctoc generated TOC please keep comment here to allow auto update -->
221
+ <!-- DON'T EDIT THIS SECTION, INSTEAD RE-RUN doctoc TO UPDATE -->
222
+
223
+ - [Installation](#installation)
224
+ - [Option 1: CLI Tool Only](#option-1-cli-tool-only)
225
+ - [Option 2: Automated Full Setup](#option-2-automated-full-setup)
226
+ - [Step 1: Clone the Repository](#step-1-clone-the-repository)
227
+ - [Step 2: Run the Setup Script](#step-2-run-the-setup-script)
228
+ - [Step 3: Start All Services](#step-3-start-all-services)
229
+ - [Step 4: Test Your Installation](#step-4-test-your-installation)
230
+ - [System Integration](#system-integration)
231
+ - [macOS Hotkeys](#macos-hotkeys)
232
+ - [Linux Hotkeys](#linux-hotkeys)
233
+ - [Claude Code Plugin](#claude-code-plugin)
234
+ - [Prerequisites](#prerequisites)
235
+ - [What You Need to Install Manually](#what-you-need-to-install-manually)
236
+ - [What the Setup Scripts Install for You](#what-the-setup-scripts-install-for-you)
237
+ - [Core Requirements (Auto-installed)](#core-requirements-auto-installed)
238
+ - [AI Services (Auto-installed and configured)](#ai-services-auto-installed-and-configured)
239
+ - [Alternative Cloud Services (Optional)](#alternative-cloud-services-optional)
240
+ - [Alternative Local LLM Servers](#alternative-local-llm-servers)
241
+ - [Usage](#usage)
242
+ - [Installation Commands](#installation-commands)
243
+ - [Installing Optional Extras](#installing-optional-extras)
244
+ - [Configuration](#configuration)
245
+ - [Managing Configuration](#managing-configuration)
246
+ - [Provider Defaults](#provider-defaults)
247
+ - [`autocorrect`](#autocorrect)
248
+ - [`transcribe`](#transcribe)
249
+ - [`transcribe-daemon`](#transcribe-daemon)
250
+ - [`speak`](#speak)
251
+ - [`voice-edit`](#voice-edit)
252
+ - [`assistant`](#assistant)
253
+ - [`chat`](#chat)
254
+ - [`rag-proxy`](#rag-proxy)
255
+ - [`memory`](#memory)
256
+ - [`memory proxy`](#memory-proxy)
257
+ - [`memory add`](#memory-add)
258
+ - [Development](#development)
259
+ - [Running Tests](#running-tests)
260
+ - [Pre-commit Hooks](#pre-commit-hooks)
261
+ - [Contributing](#contributing)
262
+ - [License](#license)
263
+
264
+ <!-- END doctoc generated TOC please keep comment here to allow auto update -->
265
+
266
+ </details>
267
+
268
+
269
+ ## Installation
270
+
271
+ ### Option 1: CLI Tool Only
272
+
273
+ If you already have AI services set up or plan to use cloud services (OpenAI/Gemini):
274
+
275
+ ```bash
276
+ # Using uv (recommended)
277
+ uv tool install agent-cli -p 3.13
278
+
279
+ # Using pip
280
+ pip install agent-cli
281
+ ```
282
+
283
+ ### Option 2: Automated Full Setup
284
+
285
+ For a complete local setup with all AI services:
286
+
287
+ #### Step 1: Clone the Repository
288
+
289
+ ```bash
290
+ git clone https://github.com/basnijholt/agent-cli.git
291
+ cd agent-cli
292
+ ```
293
+
294
+ #### Step 2: Run the Setup Script
295
+
296
+ | Platform | Setup Command | What It Does | Detailed Guide |
297
+ |----------|---------------|--------------|----------------|
298
+ | **🍎 macOS** | `./scripts/setup-macos.sh` | Installs Homebrew (if needed), uv, Ollama, all services, and agent-cli | [macOS Guide](docs/installation/macos.md) |
299
+ | **🐧 Linux** | `./scripts/setup-linux.sh` | Installs uv, Ollama, all services, and agent-cli | [Linux Guide](docs/installation/linux.md) |
300
+ | **❄️ NixOS** | See guide → | Special instructions for NixOS | [NixOS Guide](docs/installation/nixos.md) |
301
+ | **🐳 Docker** | See guide → | Container-based setup (slower) | [Docker Guide](docs/installation/docker.md) |
302
+
303
+ #### Step 3: Start All Services
304
+
305
+ ```bash
306
+ ./scripts/start-all-services.sh
307
+ ```
308
+
309
+ This launches all AI services in a single terminal session using Zellij.
310
+
311
+ #### Step 4: Test Your Installation
312
+
313
+ ```bash
314
+ agent-cli autocorrect "this has an eror"
315
+ # Output: this has an error
316
+ ```
317
+
318
+ > [!NOTE]
319
+ > The setup scripts handle everything automatically. For platform-specific details or troubleshooting, see the [installation guides](docs/installation/).
320
+
321
+ <details><summary><b>Development Installation</b></summary>
322
+
323
+ For contributing or development:
324
+
325
+ ```bash
326
+ git clone https://github.com/basnijholt/agent-cli.git
327
+ cd agent-cli
328
+ uv sync
329
+ source .venv/bin/activate # On Windows: .venv\Scripts\activate
330
+ ```
331
+
332
+ </details>
333
+
334
+ ## System Integration
335
+
336
+ Want system-wide hotkeys? You'll need the repository for the setup scripts:
337
+
338
+ ```bash
339
+ # If you haven't already cloned it
340
+ git clone https://github.com/basnijholt/agent-cli.git
341
+ cd agent-cli
342
+ ```
343
+
344
+ ### macOS Hotkeys
345
+
346
+ ```bash
347
+ ./scripts/setup-macos-hotkeys.sh
348
+ ```
349
+
350
+ This script automatically:
351
+ - ✅ Installs Homebrew if not present
352
+ - ✅ Installs skhd (hotkey daemon) and terminal-notifier
353
+ - ✅ Configures these system-wide hotkeys:
354
+ - **`Cmd+Shift+R`** - Toggle voice transcription
355
+ - **`Cmd+Shift+A`** - Autocorrect clipboard text
356
+ - **`Cmd+Shift+V`** - Voice edit clipboard text
357
+
358
+ > [!NOTE]
359
+ > After setup, you may need to grant Accessibility permissions to skhd in System Settings → Privacy & Security → Accessibility
360
+
361
+ > [!TIP]
362
+ > To keep the “Listening…” indicator visible for the whole recording, open System Settings → Notifications → *terminal-notifier* and set the Alert style to **Persistent** (or choose **Alerts** on older macOS versions).
363
+ > Also enable "Allow notification when mirroring or sharing the display".
364
+ > The hotkey scripts keep only the recording notification pinned; status and result toasts auto-dismiss.
365
+
366
+ ### Linux Hotkeys
367
+
368
+ ```bash
369
+ ./scripts/setup-linux-hotkeys.sh
370
+ ```
371
+
372
+ This script automatically:
373
+ - ✅ Installs notification tools if needed
374
+ - ✅ Provides configuration for your desktop environment
375
+ - ✅ Sets up these hotkeys:
376
+ - **`Super+Shift+R`** - Toggle voice transcription
377
+ - **`Super+Shift+A`** - Autocorrect clipboard text
378
+ - **`Super+Shift+V`** - Voice edit clipboard text
379
+
380
+ The script supports Hyprland, GNOME, KDE, Sway, i3, XFCE, and provides instructions for manual configuration on other environments.
381
+
382
+ ### Claude Code Plugin
383
+
384
+ The [`dev`](docs/commands/dev.md) command is also available as a **Claude Code plugin**, enabling Claude to automatically spawn parallel AI agents in isolated git worktrees when you ask it to work on multiple features.
385
+
386
+ ```bash
387
+ # Option 1: Install skill directly in your project (recommended)
388
+ agent-cli dev install-skill
389
+
390
+ # Option 2: Install via Claude Code plugin marketplace
391
+ claude plugin marketplace add basnijholt/agent-cli
392
+ claude plugin install agent-cli@agent-cli-dev
393
+ ```
394
+
395
+ Once installed, Claude Code can automatically use this skill when you ask to:
396
+ - "Work on these 3 features in parallel"
397
+ - "Spawn agents for auth and payments"
398
+ - "Delegate this refactoring to a separate agent"
399
+
400
+ See the [plugin documentation](.claude-plugin/README.md) for more details.
401
+
402
+ ## Prerequisites
403
+
404
+ ### What You Need to Install Manually
405
+
406
+ The only thing you need to have installed is **Git** to clone this repository. Everything else is handled automatically!
407
+
408
+ ### What the Setup Scripts Install for You
409
+
410
+ Our installation scripts automatically handle all dependencies:
411
+
412
+ #### Core Requirements (Auto-installed)
413
+ - 🍺 **Homebrew** (macOS) - Installed if not present
414
+ - 🐍 **uv** - Python package manager - Installed automatically
415
+ - 📋 **Clipboard Tools** - Pre-installed on macOS, handled on Linux
416
+
417
+ #### AI Services (Auto-installed and configured)
418
+
419
+ | Service | Purpose | Auto-installed? |
420
+ |---------|---------|-----------------|
421
+ | **[Ollama](https://ollama.ai/)** | Local LLM for text processing | ✅ Yes, with default model |
422
+ | **[Wyoming Faster Whisper](https://github.com/rhasspy/wyoming-faster-whisper)** | Speech-to-text | ✅ Yes, via `uvx` |
423
+ | **[`agent-cli server whisper`](docs/commands/server/whisper.md)** | Speech-to-text (alternative) | ✅ Built-in, `pip install "agent-cli[whisper]"` |
424
+ | **[Wyoming Piper](https://github.com/rhasspy/wyoming-piper)** | Text-to-speech | ✅ Yes, via `uvx` |
425
+ | **[Kokoro-FastAPI](https://github.com/remsky/Kokoro-FastAPI)** | Premium TTS (optional) | ⚙️ Can be added later |
426
+ | **[Wyoming openWakeWord](https://github.com/rhasspy/wyoming-openwakeword)** | Wake word detection | ✅ Yes, for `assistant` |
427
+
428
+ > **Why `agent-cli server whisper`?** The built-in Whisper server offers an OpenAI-compatible API (drop-in replacement), Wyoming protocol for Home Assistant, TTL-based VRAM management (auto-unloads idle models), and auto-selects the optimal backend ([MLX](https://github.com/ml-explore/mlx-examples/tree/main/whisper) on Apple Silicon, [faster-whisper](https://github.com/SYSTRAN/faster-whisper) on Linux/CUDA). Docker images available at `ghcr.io/basnijholt/agent-cli-whisper`.
429
+
430
+ #### Alternative Cloud Services (Optional)
431
+
432
+ If you prefer cloud services over local ones:
433
+
434
+ | Service | Purpose | Setup Required |
435
+ |---------|---------|----------------|
436
+ | **OpenAI** | LLM, Speech-to-text, TTS | API key in config |
437
+ | **Gemini** | LLM alternative | API key in config |
438
+
439
+ #### Alternative Local LLM Servers
440
+
441
+ You can also use other OpenAI-compatible local servers:
442
+
443
+ | Server | Purpose | Setup Required |
444
+ |---------|---------|----------------|
445
+ | **llama.cpp** | Local LLM inference | Use `--openai-base-url http://localhost:8080/v1` |
446
+ | **vLLM** | High-performance LLM serving | Use `--openai-base-url` with server endpoint |
447
+ | **Ollama** | Default local LLM | Already configured as default |
448
+
449
+ ## Usage
450
+
451
+ This package provides multiple command-line tools, each designed for a specific purpose.
452
+
453
+ ### Installation Commands
454
+
455
+ These commands help you set up `agent-cli` and its required services:
456
+
457
+ - **`install-services`**: Install all required AI services (Ollama, Whisper, Piper, OpenWakeWord)
458
+ - **`install-hotkeys`**: Set up system-wide hotkeys for quick access to agent-cli features
459
+ - **`install-extras`**: Install optional Python dependencies (rag, memory, vad, etc.) with pinned versions
460
+ - **`start-services`**: Start all services in a Zellij terminal session
461
+
462
+ All necessary scripts are bundled with the package, so you can run these commands immediately after installing `agent-cli`.
463
+
464
+ #### Installing Optional Extras
465
+
466
+ Some features require additional Python dependencies. By default, **agent-cli will auto-install missing extras** when you run a command that needs them. To disable this, set `AGENT_CLI_NO_AUTO_INSTALL=1` or add to your config file:
467
+
468
+ ```toml
469
+ [settings]
470
+ auto_install_extras = false
471
+ ```
472
+
473
+ You can also manually install extras with `install-extras`:
474
+
475
+ ```bash
476
+ # List available extras
477
+ agent-cli install-extras --list
478
+
479
+ # Install specific extras
480
+ agent-cli install-extras rag memory vad
481
+ ```
482
+
483
+ <details>
484
+ <summary>See the output of <code>agent-cli install-extras --help</code></summary>
485
+
486
+ <!-- CODE:BASH:START -->
487
+ <!-- echo '```yaml' -->
488
+ <!-- export NO_COLOR=1 -->
489
+ <!-- export TERM=dumb -->
490
+ <!-- export COLUMNS=90 -->
491
+ <!-- export TERMINAL_WIDTH=90 -->
492
+ <!-- agent-cli install-extras --help -->
493
+ <!-- echo '```' -->
494
+ <!-- CODE:END -->
495
+ <!-- OUTPUT:START -->
496
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
497
+ ```yaml
498
+
499
+ Usage: agent-cli install-extras [OPTIONS] [EXTRAS]...
500
+
501
+ Install optional extras (rag, memory, vad, etc.) with pinned versions.
502
+
503
+ Examples:
504
+
505
+ • agent-cli install-extras rag # Install RAG dependencies
506
+ • agent-cli install-extras memory vad # Install multiple extras
507
+ • agent-cli install-extras --list # Show available extras
508
+ • agent-cli install-extras --all # Install all extras
509
+
510
+ ╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
511
+ │ extras [EXTRAS]... Extras to install │
512
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
513
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
514
+ │ --list -l List available extras │
515
+ │ --all -a Install all available extras │
516
+ │ --help -h Show this message and exit. │
517
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
518
+
519
+ ```
520
+
521
+ <!-- OUTPUT:END -->
522
+
523
+ </details>
524
+
525
+ ### Configuration
526
+
527
+ All `agent-cli` commands can be configured using a TOML file. The configuration file is searched for in the following locations, in order:
528
+
529
+ 1. `./agent-cli-config.toml` (in the current directory)
530
+ 2. `~/.config/agent-cli/config.toml`
531
+
532
+ You can also specify a path to a configuration file using the `--config` option, e.g., `agent-cli transcribe --config /path/to/your/config.toml`.
533
+
534
+ Command-line options always take precedence over settings in the configuration file.
535
+
536
+ #### Managing Configuration
537
+
538
+ Use the `config` command to manage your configuration files:
539
+
540
+ ```bash
541
+ # Create a new config file with all options (commented out as a template)
542
+ agent-cli config init
543
+
544
+ # View your current config (syntax highlighted)
545
+ agent-cli config show
546
+
547
+ # View config as raw text (for copy-paste)
548
+ agent-cli config show --raw
549
+
550
+ # Open config in your editor ($EDITOR, or nano/vim)
551
+ agent-cli config edit
552
+ ```
553
+
554
+ <details>
555
+ <summary>See the output of <code>agent-cli config --help</code></summary>
556
+
557
+ <!-- CODE:BASH:START -->
558
+ <!-- echo '```yaml' -->
559
+ <!-- export NO_COLOR=1 -->
560
+ <!-- export TERM=dumb -->
561
+ <!-- export COLUMNS=90 -->
562
+ <!-- export TERMINAL_WIDTH=90 -->
563
+ <!-- agent-cli config --help -->
564
+ <!-- echo '```' -->
565
+ <!-- CODE:END -->
566
+ <!-- OUTPUT:START -->
567
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
568
+ ```yaml
569
+
570
+ Usage: agent-cli config [OPTIONS] COMMAND [ARGS]...
571
+
572
+ Manage agent-cli configuration files.
573
+
574
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
575
+ │ --help -h Show this message and exit. │
576
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
577
+ ╭─ Commands ─────────────────────────────────────────────────────────────────────────────╮
578
+ │ init Create a new config file with all options commented out. │
579
+ │ edit Open the config file in your default editor. │
580
+ │ show Display the config file location and contents. │
581
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
582
+
583
+ ```
584
+
585
+ <!-- OUTPUT:END -->
586
+
587
+ </details>
588
+
589
+ An example configuration file is also provided in [`example.agent-cli-config.toml`](./example.agent-cli-config.toml).
590
+
591
+ #### Provider Defaults
592
+
593
+ You can choose local or cloud services per capability by setting provider keys in
594
+ the `[defaults]` section of your configuration file.
595
+
596
+ ```toml
597
+ [defaults]
598
+ # llm_provider = "ollama" # 'ollama', 'openai', or 'gemini'
599
+ # asr_provider = "wyoming" # 'wyoming' or 'openai'
600
+ # tts_provider = "wyoming" # 'wyoming', 'openai', or 'kokoro'
601
+ # openai_api_key = "sk-..."
602
+ # gemini_api_key = "..."
603
+ ```
604
+
605
+ ### `autocorrect`
606
+
607
+ **Purpose:** Quickly fix spelling and grammar in any text you've copied.
608
+
609
+ **Workflow:** This is a simple, one-shot command.
610
+
611
+ 1. It reads text from your system clipboard (or from a direct argument).
612
+ 2. It sends the text to a local Ollama LLM with a prompt to perform only technical corrections.
613
+ 3. The corrected text is copied back to your clipboard, replacing the original.
614
+
615
+ **How to Use It:** This tool is ideal for integrating with a system-wide hotkey.
616
+
617
+ - **From Clipboard**: `agent-cli autocorrect`
618
+ - **From Argument**: `agent-cli autocorrect "this text has an eror"`
619
+
620
+ <details>
621
+ <summary>See the output of <code>agent-cli autocorrect --help</code></summary>
622
+
623
+ <!-- CODE:BASH:START -->
624
+ <!-- echo '```yaml' -->
625
+ <!-- export NO_COLOR=1 -->
626
+ <!-- export TERM=dumb -->
627
+ <!-- export COLUMNS=90 -->
628
+ <!-- export TERMINAL_WIDTH=90 -->
629
+ <!-- agent-cli autocorrect --help -->
630
+ <!-- echo '```' -->
631
+ <!-- CODE:END -->
632
+ <!-- OUTPUT:START -->
633
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
634
+ ```yaml
635
+
636
+ Usage: agent-cli autocorrect [OPTIONS] [TEXT]
637
+
638
+ Correct text from clipboard using a local or remote LLM.
639
+
640
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
641
+ │ text [TEXT] The text to correct. If not provided, reads from clipboard. │
642
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
643
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
644
+ │ --help -h Show this message and exit. │
645
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
646
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
647
+ │ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
648
+ │ [env var: LLM_PROVIDER] │
649
+ │ [default: ollama] │
650
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
651
+ ╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
652
+ │ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
653
+ │ [env var: LLM_OLLAMA_MODEL] │
654
+ │ [default: gemma3:4b] │
655
+ │ --llm-ollama-host TEXT The Ollama server host. Default is │
656
+ │ http://localhost:11434. │
657
+ │ [env var: LLM_OLLAMA_HOST] │
658
+ │ [default: http://localhost:11434] │
659
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
660
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
661
+ │ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
662
+ │ [env var: LLM_OPENAI_MODEL] │
663
+ │ [default: gpt-5-mini] │
664
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
665
+ │ OPENAI_API_KEY environment variable. │
666
+ │ [env var: OPENAI_API_KEY] │
667
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
668
+ │ llama-server: http://localhost:8080/v1). │
669
+ │ [env var: OPENAI_BASE_URL] │
670
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
671
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
672
+ │ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
673
+ │ [env var: LLM_GEMINI_MODEL] │
674
+ │ [default: gemini-3-flash-preview] │
675
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
676
+ │ GEMINI_API_KEY environment variable. │
677
+ │ [env var: GEMINI_API_KEY] │
678
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
679
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
680
+ │ --log-level [debug|info|warning|error] Set logging level. │
681
+ │ [env var: LOG_LEVEL] │
682
+ │ [default: info] │
683
+ │ --log-file TEXT Path to a file to write logs to. │
684
+ │ --quiet -q Suppress console output from rich. │
685
+ │ --json Output result as JSON for │
686
+ │ automation. Implies --quiet and │
687
+ │ --no-clipboard. │
688
+ │ --config TEXT Path to a TOML configuration file. │
689
+ │ --print-args Print the command line arguments, │
690
+ │ including variables taken from the │
691
+ │ configuration file. │
692
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
693
+
694
+ ```
695
+
696
+ <!-- OUTPUT:END -->
697
+
698
+ </details>
699
+
700
+ ### `transcribe`
701
+
702
+ **Purpose:** A simple tool to turn your speech into text.
703
+
704
+ **Workflow:** This agent listens to your microphone and converts your speech to text in real-time.
705
+
706
+ 1. Run the command. It will start listening immediately.
707
+ 2. Speak into your microphone.
708
+ 3. Press `Ctrl+C` to stop recording.
709
+ 4. The transcribed text is copied to your clipboard.
710
+ 5. Optionally, use the `--llm` flag to have an Ollama model clean up the raw transcript (fixing punctuation, etc.).
711
+
712
+ **How to Use It:**
713
+
714
+ - **Simple Transcription**: `agent-cli transcribe --input-device-index 1`
715
+ - **With LLM Cleanup**: `agent-cli transcribe --input-device-index 1 --llm`
716
+
717
+ <details>
718
+ <summary>See the output of <code>agent-cli transcribe --help</code></summary>
719
+
720
+ <!-- CODE:BASH:START -->
721
+ <!-- echo '```yaml' -->
722
+ <!-- export NO_COLOR=1 -->
723
+ <!-- export TERM=dumb -->
724
+ <!-- export COLUMNS=90 -->
725
+ <!-- export TERMINAL_WIDTH=90 -->
726
+ <!-- agent-cli transcribe --help -->
727
+ <!-- echo '```' -->
728
+ <!-- CODE:END -->
729
+ <!-- OUTPUT:START -->
730
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
731
+ ```yaml
732
+
733
+ Usage: agent-cli transcribe [OPTIONS]
734
+
735
+ Wyoming ASR Client for streaming microphone audio to a transcription server.
736
+
737
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
738
+ │ --help -h Show this message and exit. │
739
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
740
+ ╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
741
+ │ --extra-instructions TEXT Additional instructions for the LLM to │
742
+ │ process the transcription. │
743
+ │ --llm --no-llm Use an LLM to process the transcript. │
744
+ │ [default: no-llm] │
745
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
746
+ ╭─ Audio Recovery ───────────────────────────────────────────────────────────────────────╮
747
+ │ --from-file PATH Transcribe audio from a file │
748
+ │ (supports wav, mp3, m4a, ogg, │
749
+ │ flac, aac, webm). Requires ffmpeg │
750
+ │ for non-WAV formats with Wyoming │
751
+ │ provider. │
752
+ │ --last-recording INTEGER Transcribe a saved recording. Use │
753
+ │ 1 for most recent, 2 for │
754
+ │ second-to-last, etc. Use 0 to │
755
+ │ disable (default). │
756
+ │ [default: 0] │
757
+ │ --save-recording --no-save-recording Save the audio recording to disk │
758
+ │ for recovery. │
759
+ │ [default: save-recording] │
760
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
761
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
762
+ │ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
763
+ │ [env var: ASR_PROVIDER] │
764
+ │ [default: wyoming] │
765
+ │ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
766
+ │ [env var: LLM_PROVIDER] │
767
+ │ [default: ollama] │
768
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
769
+ ╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
770
+ │ --input-device-index INTEGER Index of the audio input device to use. │
771
+ │ --input-device-name TEXT Device name keywords for partial matching. │
772
+ │ --list-devices List available audio input and output devices and │
773
+ │ exit. │
774
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
775
+ ╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
776
+ │ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
777
+ │ [env var: ASR_WYOMING_IP] │
778
+ │ [default: localhost] │
779
+ │ --asr-wyoming-port INTEGER Wyoming ASR server port. │
780
+ │ [env var: ASR_WYOMING_PORT] │
781
+ │ [default: 10300] │
782
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
783
+ ╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
784
+ │ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
785
+ │ [env var: ASR_OPENAI_MODEL] │
786
+ │ [default: whisper-1] │
787
+ │ --asr-openai-base-url TEXT Custom base URL for OpenAI-compatible ASR API │
788
+ │ (e.g., for custom Whisper server: │
789
+ │ http://localhost:9898). │
790
+ │ [env var: ASR_OPENAI_BASE_URL] │
791
+ │ --asr-openai-prompt TEXT Custom prompt to guide transcription (optional). │
792
+ │ [env var: ASR_OPENAI_PROMPT] │
793
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
794
+ ╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
795
+ │ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
796
+ │ [env var: ASR_GEMINI_MODEL] │
797
+ │ [default: gemini-3-flash-preview] │
798
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
799
+ ╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
800
+ │ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
801
+ │ [env var: LLM_OLLAMA_MODEL] │
802
+ │ [default: gemma3:4b] │
803
+ │ --llm-ollama-host TEXT The Ollama server host. Default is │
804
+ │ http://localhost:11434. │
805
+ │ [env var: LLM_OLLAMA_HOST] │
806
+ │ [default: http://localhost:11434] │
807
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
808
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
809
+ │ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
810
+ │ [env var: LLM_OPENAI_MODEL] │
811
+ │ [default: gpt-5-mini] │
812
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
813
+ │ OPENAI_API_KEY environment variable. │
814
+ │ [env var: OPENAI_API_KEY] │
815
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
816
+ │ llama-server: http://localhost:8080/v1). │
817
+ │ [env var: OPENAI_BASE_URL] │
818
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
819
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
820
+ │ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
821
+ │ [env var: LLM_GEMINI_MODEL] │
822
+ │ [default: gemini-3-flash-preview] │
823
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
824
+ │ GEMINI_API_KEY environment variable. │
825
+ │ [env var: GEMINI_API_KEY] │
826
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
827
+ ╭─ Process Management ───────────────────────────────────────────────────────────────────╮
828
+ │ --stop Stop any running background process. │
829
+ │ --status Check if a background process is running. │
830
+ │ --toggle Toggle the background process on/off. If the process is running, it │
831
+ │ will be stopped. If the process is not running, it will be started. │
832
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
833
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
834
+ │ --clipboard --no-clipboard Copy result to │
835
+ │ clipboard. │
836
+ │ [default: clipboard] │
837
+ │ --log-level [debug|info|warning| Set logging level. │
838
+ │ error] [env var: LOG_LEVEL] │
839
+ │ [default: info] │
840
+ │ --log-file TEXT Path to a file to │
841
+ │ write logs to. │
842
+ │ --quiet -q Suppress console │
843
+ │ output from rich. │
844
+ │ --json Output result as JSON │
845
+ │ for automation. │
846
+ │ Implies --quiet and │
847
+ │ --no-clipboard. │
848
+ │ --config TEXT Path to a TOML │
849
+ │ configuration file. │
850
+ │ --print-args Print the command │
851
+ │ line arguments, │
852
+ │ including variables │
853
+ │ taken from the │
854
+ │ configuration file. │
855
+ │ --transcription-log PATH Path to log │
856
+ │ transcription results │
857
+ │ with timestamps, │
858
+ │ hostname, model, and │
859
+ │ raw output. │
860
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
861
+
862
+ ```
863
+
864
+ <!-- OUTPUT:END -->
865
+
866
+ </details>
867
+
868
+ ### `transcribe-daemon`
869
+
870
+ **Purpose:** A continuous background transcription service that automatically detects and transcribes speech.
871
+
872
+ **Workflow:** Runs as a daemon, listening to your microphone and automatically segmenting speech using voice activity detection (VAD).
873
+
874
+ 1. Run the command. It starts listening immediately.
875
+ 2. Speak naturally - the daemon detects when you start and stop speaking.
876
+ 3. Each speech segment is automatically transcribed and logged.
877
+ 4. Optionally, audio is saved as MP3 files for later reference.
878
+ 5. Press `Ctrl+C` to stop the daemon.
879
+
880
+ **Installation:** Requires the `vad` extra:
881
+ ```bash
882
+ uv tool install "agent-cli[vad]" -p 3.13
883
+ ```
884
+
885
+ **How to Use It:**
886
+
887
+ - **Basic Daemon**: `agent-cli transcribe-daemon`
888
+ - **With Custom Role**: `agent-cli transcribe-daemon --role meeting`
889
+ - **With LLM Cleanup**: `agent-cli transcribe-daemon --llm`
890
+ - **Custom Silence Threshold**: `agent-cli transcribe-daemon --silence-threshold 1.5`
891
+
892
+ **Output Files:**
893
+
894
+ - **Transcription Log**: `~/.config/agent-cli/transcriptions.jsonl` (JSON Lines format)
895
+ - **Audio Files**: `~/.config/agent-cli/audio/YYYY/MM/DD/*.mp3`
896
+
897
+ <details>
898
+ <summary>See the output of <code>agent-cli transcribe-daemon --help</code></summary>
899
+
900
+ <!-- CODE:BASH:START -->
901
+ <!-- echo '```yaml' -->
902
+ <!-- export NO_COLOR=1 -->
903
+ <!-- export TERM=dumb -->
904
+ <!-- export COLUMNS=90 -->
905
+ <!-- export TERMINAL_WIDTH=90 -->
906
+ <!-- agent-cli transcribe-daemon --help -->
907
+ <!-- echo '```' -->
908
+ <!-- CODE:END -->
909
+ <!-- OUTPUT:START -->
910
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
911
+ ```yaml
912
+
913
+ Usage: agent-cli transcribe-daemon [OPTIONS]
914
+
915
+ Run a continuous transcription daemon with voice activity detection.
916
+
917
+ This command runs indefinitely, capturing audio from your microphone, detecting speech
918
+ segments using Silero VAD, transcribing them, and logging results with timestamps.
919
+
920
+ Examples: # Basic daemon agent-cli transcribe-daemon
921
+
922
+
923
+ # With role and custom silence threshold
924
+ agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
925
+
926
+ # With LLM cleanup
927
+ agent-cli transcribe-daemon --llm --role notes
928
+
929
+ # Custom log file and audio directory
930
+ agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
931
+
932
+
933
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
934
+ │ --role -r TEXT Role name for logging (e.g., │
935
+ │ 'meeting', 'notes', 'user'). │
936
+ │ [default: user] │
937
+ │ --silence-threshold -s FLOAT Seconds of silence to end a speech │
938
+ │ segment. │
939
+ │ [default: 1.0] │
940
+ │ --min-segment -m FLOAT Minimum speech duration in seconds │
941
+ │ to trigger a segment. │
942
+ │ [default: 0.25] │
943
+ │ --vad-threshold FLOAT VAD speech detection threshold │
944
+ │ (0.0-1.0). Higher = more aggressive │
945
+ │ filtering. │
946
+ │ [default: 0.3] │
947
+ │ --save-audio --no-save-audio Save audio segments as MP3 files. │
948
+ │ [default: save-audio] │
949
+ │ --audio-dir PATH Directory for MP3 files. Default: │
950
+ │ ~/.config/agent-cli/audio │
951
+ │ --transcription-log -t PATH JSON Lines log file path. Default: │
952
+ │ ~/.config/agent-cli/transcriptions… │
953
+ │ --clipboard --no-clipboard Copy each transcription to │
954
+ │ clipboard. │
955
+ │ [default: no-clipboard] │
956
+ │ --help -h Show this message and exit. │
957
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
958
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
959
+ │ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
960
+ │ [env var: ASR_PROVIDER] │
961
+ │ [default: wyoming] │
962
+ │ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
963
+ │ [env var: LLM_PROVIDER] │
964
+ │ [default: ollama] │
965
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
966
+ ╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
967
+ │ --input-device-index INTEGER Index of the audio input device to use. │
968
+ │ --input-device-name TEXT Device name keywords for partial matching. │
969
+ │ --list-devices List available audio input and output devices and │
970
+ │ exit. │
971
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
972
+ ╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
973
+ │ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
974
+ │ [env var: ASR_WYOMING_IP] │
975
+ │ [default: localhost] │
976
+ │ --asr-wyoming-port INTEGER Wyoming ASR server port. │
977
+ │ [env var: ASR_WYOMING_PORT] │
978
+ │ [default: 10300] │
979
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
980
+ ╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
981
+ │ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
982
+ │ [env var: ASR_OPENAI_MODEL] │
983
+ │ [default: whisper-1] │
984
+ │ --asr-openai-base-url TEXT Custom base URL for OpenAI-compatible ASR API │
985
+ │ (e.g., for custom Whisper server: │
986
+ │ http://localhost:9898). │
987
+ │ [env var: ASR_OPENAI_BASE_URL] │
988
+ │ --asr-openai-prompt TEXT Custom prompt to guide transcription (optional). │
989
+ │ [env var: ASR_OPENAI_PROMPT] │
990
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
991
+ ╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
992
+ │ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
993
+ │ [env var: ASR_GEMINI_MODEL] │
994
+ │ [default: gemini-3-flash-preview] │
995
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
996
+ ╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
997
+ │ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
998
+ │ [env var: LLM_OLLAMA_MODEL] │
999
+ │ [default: gemma3:4b] │
1000
+ │ --llm-ollama-host TEXT The Ollama server host. Default is │
1001
+ │ http://localhost:11434. │
1002
+ │ [env var: LLM_OLLAMA_HOST] │
1003
+ │ [default: http://localhost:11434] │
1004
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1005
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
1006
+ │ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
1007
+ │ [env var: LLM_OPENAI_MODEL] │
1008
+ │ [default: gpt-5-mini] │
1009
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
1010
+ │ OPENAI_API_KEY environment variable. │
1011
+ │ [env var: OPENAI_API_KEY] │
1012
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
1013
+ │ llama-server: http://localhost:8080/v1). │
1014
+ │ [env var: OPENAI_BASE_URL] │
1015
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1016
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
1017
+ │ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
1018
+ │ [env var: LLM_GEMINI_MODEL] │
1019
+ │ [default: gemini-3-flash-preview] │
1020
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
1021
+ │ GEMINI_API_KEY environment variable. │
1022
+ │ [env var: GEMINI_API_KEY] │
1023
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1024
+ ╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
1025
+ │ --llm --no-llm Use an LLM to process the transcript. │
1026
+ │ [default: no-llm] │
1027
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1028
+ ╭─ Process Management ───────────────────────────────────────────────────────────────────╮
1029
+ │ --stop Stop any running background process. │
1030
+ │ --status Check if a background process is running. │
1031
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1032
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1033
+ │ --log-level [debug|info|warning|error] Set logging level. │
1034
+ │ [env var: LOG_LEVEL] │
1035
+ │ [default: info] │
1036
+ │ --log-file TEXT Path to a file to write logs to. │
1037
+ │ --quiet -q Suppress console output from rich. │
1038
+ │ --config TEXT Path to a TOML configuration file. │
1039
+ │ --print-args Print the command line arguments, │
1040
+ │ including variables taken from the │
1041
+ │ configuration file. │
1042
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1043
+
1044
+ ```
1045
+
1046
+ <!-- OUTPUT:END -->
1047
+
1048
+ </details>
1049
+
1050
+ ### `speak`
1051
+
1052
+ **Purpose:** Reads any text out loud.
1053
+
1054
+ **Workflow:** A straightforward text-to-speech utility.
1055
+
1056
+ 1. It takes text from a command-line argument or your clipboard.
1057
+ 2. It sends the text to a Wyoming TTS server (like Piper).
1058
+ 3. The generated audio is played through your default speakers.
1059
+
1060
+ **How to Use It:**
1061
+
1062
+ - **Speak from Argument**: `agent-cli speak "Hello, world!"`
1063
+ - **Speak from Clipboard**: `agent-cli speak`
1064
+ - **Save to File**: `agent-cli speak "Hello" --save-file hello.wav`
1065
+
1066
+ <details>
1067
+ <summary>See the output of <code>agent-cli speak --help</code></summary>
1068
+
1069
+ <!-- CODE:BASH:START -->
1070
+ <!-- echo '```yaml' -->
1071
+ <!-- export NO_COLOR=1 -->
1072
+ <!-- export TERM=dumb -->
1073
+ <!-- export COLUMNS=90 -->
1074
+ <!-- export TERMINAL_WIDTH=90 -->
1075
+ <!-- agent-cli speak --help -->
1076
+ <!-- echo '```' -->
1077
+ <!-- CODE:END -->
1078
+ <!-- OUTPUT:START -->
1079
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
1080
+ ```yaml
1081
+
1082
+ Usage: agent-cli speak [OPTIONS] [TEXT]
1083
+
1084
+ Convert text to speech using Wyoming or OpenAI-compatible TTS server.
1085
+
1086
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1087
+ │ text [TEXT] Text to speak. Reads from clipboard if not provided. │
1088
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1089
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
1090
+ │ --help -h Show this message and exit. │
1091
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1092
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
1093
+ │ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
1094
+ │ 'gemini'). │
1095
+ │ [env var: TTS_PROVIDER] │
1096
+ │ [default: wyoming] │
1097
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1098
+ ╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
1099
+ │ --output-device-index INTEGER Index of the audio output device to use for TTS. │
1100
+ │ --output-device-name TEXT Output device name keywords for partial │
1101
+ │ matching. │
1102
+ │ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, 2.0 = │
1103
+ │ twice as fast, 0.5 = half speed). │
1104
+ │ [default: 1.0] │
1105
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1106
+ ╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
1107
+ │ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
1108
+ │ [default: localhost] │
1109
+ │ --tts-wyoming-port INTEGER Wyoming TTS server port. │
1110
+ │ [default: 10200] │
1111
+ │ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
1112
+ │ 'en_US-lessac-medium'). │
1113
+ │ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
1114
+ │ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
1115
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1116
+ ╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
1117
+ │ --tts-openai-model TEXT The OpenAI model to use for TTS. │
1118
+ │ [default: tts-1] │
1119
+ │ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
1120
+ │ [default: alloy] │
1121
+ │ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
1122
+ │ (e.g., http://localhost:8000/v1 for a proxy). │
1123
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1124
+ ╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
1125
+ │ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
1126
+ │ [default: kokoro] │
1127
+ │ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
1128
+ │ [default: af_sky] │
1129
+ │ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
1130
+ │ [default: http://localhost:8880/v1] │
1131
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1132
+ ╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
1133
+ │ --tts-gemini-model TEXT The Gemini model to use for TTS. │
1134
+ │ [default: gemini-2.5-flash-preview-tts] │
1135
+ │ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
1136
+ │ 'Charon', 'Fenrir'). │
1137
+ │ [default: Kore] │
1138
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1139
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
1140
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
1141
+ │ GEMINI_API_KEY environment variable. │
1142
+ │ [env var: GEMINI_API_KEY] │
1143
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1144
+ ╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
1145
+ │ --list-devices List available audio input and output devices and exit. │
1146
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1147
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1148
+ │ --save-file PATH Save TTS response audio to WAV file. │
1149
+ │ --log-level [debug|info|warning|error] Set logging level. │
1150
+ │ [env var: LOG_LEVEL] │
1151
+ │ [default: info] │
1152
+ │ --log-file TEXT Path to a file to write logs to. │
1153
+ │ --quiet -q Suppress console output from rich. │
1154
+ │ --json Output result as JSON for │
1155
+ │ automation. Implies --quiet and │
1156
+ │ --no-clipboard. │
1157
+ │ --config TEXT Path to a TOML configuration file. │
1158
+ │ --print-args Print the command line arguments, │
1159
+ │ including variables taken from the │
1160
+ │ configuration file. │
1161
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1162
+ ╭─ Process Management ───────────────────────────────────────────────────────────────────╮
1163
+ │ --stop Stop any running background process. │
1164
+ │ --status Check if a background process is running. │
1165
+ │ --toggle Toggle the background process on/off. If the process is running, it │
1166
+ │ will be stopped. If the process is not running, it will be started. │
1167
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1168
+
1169
+ ```
1170
+
1171
+ <!-- OUTPUT:END -->
1172
+
1173
+ </details>
1174
+
1175
+ ### `voice-edit`
1176
+
1177
+ **Purpose:** A powerful clipboard assistant that you command with your voice.
1178
+
1179
+ **Workflow:** This agent is designed for a hotkey-driven workflow to act on text you've already copied.
1180
+
1181
+ 1. Copy a block of text to your clipboard (e.g., an email draft).
1182
+ 2. Press a hotkey to run `agent-cli voice-edit &` in the background. The agent is now listening.
1183
+ 3. Speak a command, such as "Make this more formal" or "Summarize the key points."
1184
+ 4. Press the same hotkey again, which should trigger `agent-cli voice-edit --stop`.
1185
+ 5. The agent transcribes your command, sends it along with the original clipboard text to the LLM, and the LLM performs the action.
1186
+ 6. The result is copied back to your clipboard. If `--tts` is enabled, it will also speak the result.
1187
+
1188
+ **How to Use It:** The power of this tool is unlocked with a hotkey manager like Keyboard Maestro (macOS) or AutoHotkey (Windows). See the docstring in `agent_cli/agents/voice_edit.py` for a detailed Keyboard Maestro setup guide.
1189
+
1190
+ <details>
1191
+ <summary>See the output of <code>agent-cli voice-edit --help</code></summary>
1192
+
1193
+ <!-- CODE:BASH:START -->
1194
+ <!-- echo '```yaml' -->
1195
+ <!-- export NO_COLOR=1 -->
1196
+ <!-- export TERM=dumb -->
1197
+ <!-- export COLUMNS=90 -->
1198
+ <!-- export TERMINAL_WIDTH=90 -->
1199
+ <!-- agent-cli voice-edit --help -->
1200
+ <!-- echo '```' -->
1201
+ <!-- CODE:END -->
1202
+ <!-- OUTPUT:START -->
1203
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
1204
+ ```yaml
1205
+
1206
+ Usage: agent-cli voice-edit [OPTIONS]
1207
+
1208
+ Interact with clipboard text via a voice command using local or remote services.
1209
+
1210
+ Usage:
1211
+
1212
+ • Run in foreground: agent-cli voice-edit --input-device-index 1
1213
+ • Run in background: agent-cli voice-edit --input-device-index 1 &
1214
+ • Check status: agent-cli voice-edit --status
1215
+ • Stop background process: agent-cli voice-edit --stop
1216
+ • List output devices: agent-cli voice-edit --list-output-devices
1217
+ • Save TTS to file: agent-cli voice-edit --tts --save-file response.wav
1218
+
1219
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
1220
+ │ --help -h Show this message and exit. │
1221
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1222
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
1223
+ │ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
1224
+ │ [env var: ASR_PROVIDER] │
1225
+ │ [default: wyoming] │
1226
+ │ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
1227
+ │ [env var: LLM_PROVIDER] │
1228
+ │ [default: ollama] │
1229
+ │ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
1230
+ │ 'gemini'). │
1231
+ │ [env var: TTS_PROVIDER] │
1232
+ │ [default: wyoming] │
1233
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1234
+ ╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
1235
+ │ --input-device-index INTEGER Index of the audio input device to use. │
1236
+ │ --input-device-name TEXT Device name keywords for partial matching. │
1237
+ │ --list-devices List available audio input and output devices and │
1238
+ │ exit. │
1239
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1240
+ ╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
1241
+ │ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
1242
+ │ [env var: ASR_WYOMING_IP] │
1243
+ │ [default: localhost] │
1244
+ │ --asr-wyoming-port INTEGER Wyoming ASR server port. │
1245
+ │ [env var: ASR_WYOMING_PORT] │
1246
+ │ [default: 10300] │
1247
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1248
+ ╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
1249
+ │ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
1250
+ │ [env var: ASR_OPENAI_MODEL] │
1251
+ │ [default: whisper-1] │
1252
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1253
+ ╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
1254
+ │ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
1255
+ │ [env var: ASR_GEMINI_MODEL] │
1256
+ │ [default: gemini-3-flash-preview] │
1257
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1258
+ ╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
1259
+ │ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
1260
+ │ [env var: LLM_OLLAMA_MODEL] │
1261
+ │ [default: gemma3:4b] │
1262
+ │ --llm-ollama-host TEXT The Ollama server host. Default is │
1263
+ │ http://localhost:11434. │
1264
+ │ [env var: LLM_OLLAMA_HOST] │
1265
+ │ [default: http://localhost:11434] │
1266
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1267
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
1268
+ │ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
1269
+ │ [env var: LLM_OPENAI_MODEL] │
1270
+ │ [default: gpt-5-mini] │
1271
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
1272
+ │ OPENAI_API_KEY environment variable. │
1273
+ │ [env var: OPENAI_API_KEY] │
1274
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
1275
+ │ llama-server: http://localhost:8080/v1). │
1276
+ │ [env var: OPENAI_BASE_URL] │
1277
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1278
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
1279
+ │ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
1280
+ │ [env var: LLM_GEMINI_MODEL] │
1281
+ │ [default: gemini-3-flash-preview] │
1282
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
1283
+ │ GEMINI_API_KEY environment variable. │
1284
+ │ [env var: GEMINI_API_KEY] │
1285
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1286
+ ╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
1287
+ │ --tts --no-tts Enable text-to-speech for responses. │
1288
+ │ [default: no-tts] │
1289
+ │ --output-device-index INTEGER Index of the audio output device to use │
1290
+ │ for TTS. │
1291
+ │ --output-device-name TEXT Output device name keywords for partial │
1292
+ │ matching. │
1293
+ │ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, │
1294
+ │ 2.0 = twice as fast, 0.5 = half speed). │
1295
+ │ [default: 1.0] │
1296
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1297
+ ╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
1298
+ │ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
1299
+ │ [default: localhost] │
1300
+ │ --tts-wyoming-port INTEGER Wyoming TTS server port. │
1301
+ │ [default: 10200] │
1302
+ │ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
1303
+ │ 'en_US-lessac-medium'). │
1304
+ │ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
1305
+ │ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
1306
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1307
+ ╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
1308
+ │ --tts-openai-model TEXT The OpenAI model to use for TTS. │
1309
+ │ [default: tts-1] │
1310
+ │ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
1311
+ │ [default: alloy] │
1312
+ │ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
1313
+ │ (e.g., http://localhost:8000/v1 for a proxy). │
1314
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1315
+ ╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
1316
+ │ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
1317
+ │ [default: kokoro] │
1318
+ │ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
1319
+ │ [default: af_sky] │
1320
+ │ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
1321
+ │ [default: http://localhost:8880/v1] │
1322
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1323
+ ╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
1324
+ │ --tts-gemini-model TEXT The Gemini model to use for TTS. │
1325
+ │ [default: gemini-2.5-flash-preview-tts] │
1326
+ │ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
1327
+ │ 'Charon', 'Fenrir'). │
1328
+ │ [default: Kore] │
1329
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1330
+ ╭─ Process Management ───────────────────────────────────────────────────────────────────╮
1331
+ │ --stop Stop any running background process. │
1332
+ │ --status Check if a background process is running. │
1333
+ │ --toggle Toggle the background process on/off. If the process is running, it │
1334
+ │ will be stopped. If the process is not running, it will be started. │
1335
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1336
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1337
+ │ --save-file PATH Save TTS response audio │
1338
+ │ to WAV file. │
1339
+ │ --clipboard --no-clipboard Copy result to │
1340
+ │ clipboard. │
1341
+ │ [default: clipboard] │
1342
+ │ --log-level [debug|info|warning|erro Set logging level. │
1343
+ │ r] [env var: LOG_LEVEL] │
1344
+ │ [default: info] │
1345
+ │ --log-file TEXT Path to a file to write │
1346
+ │ logs to. │
1347
+ │ --quiet -q Suppress console output │
1348
+ │ from rich. │
1349
+ │ --json Output result as JSON │
1350
+ │ for automation. Implies │
1351
+ │ --quiet and │
1352
+ │ --no-clipboard. │
1353
+ │ --config TEXT Path to a TOML │
1354
+ │ configuration file. │
1355
+ │ --print-args Print the command line │
1356
+ │ arguments, including │
1357
+ │ variables taken from the │
1358
+ │ configuration file. │
1359
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1360
+
1361
+ ```
1362
+
1363
+ <!-- OUTPUT:END -->
1364
+
1365
+ </details>
1366
+
1367
+ ### `assistant`
1368
+
1369
+ **Purpose:** A hands-free voice assistant that starts and stops recording based on a wake word.
1370
+
1371
+ **Workflow:** This agent continuously listens for a wake word (e.g., "Hey Nabu").
1372
+
1373
+ 1. Run the `assistant` command. It will start listening for the wake word.
1374
+ 2. Say the wake word to start recording.
1375
+ 3. Speak your command or question.
1376
+ 4. Say the wake word again to stop recording.
1377
+ 5. The agent transcribes your speech, sends it to the LLM, and gets a response.
1378
+ 6. The agent speaks the response back to you and then immediately starts listening for the wake word again.
1379
+
1380
+ **How to Use It:**
1381
+
1382
+ - **Start the agent**: `agent-cli assistant --wake-word "ok_nabu" --input-device-index 1`
1383
+ - **With TTS**: `agent-cli assistant --wake-word "ok_nabu" --tts --tts-wyoming-voice "en_US-lessac-medium"`
1384
+
1385
+ <details>
1386
+ <summary>See the output of <code>agent-cli assistant --help</code></summary>
1387
+
1388
+ <!-- CODE:BASH:START -->
1389
+ <!-- echo '```yaml' -->
1390
+ <!-- export NO_COLOR=1 -->
1391
+ <!-- export TERM=dumb -->
1392
+ <!-- export COLUMNS=90 -->
1393
+ <!-- export TERMINAL_WIDTH=90 -->
1394
+ <!-- agent-cli assistant --help -->
1395
+ <!-- echo '```' -->
1396
+ <!-- CODE:END -->
1397
+ <!-- OUTPUT:START -->
1398
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
1399
+ ```yaml
1400
+
1401
+ Usage: agent-cli assistant [OPTIONS]
1402
+
1403
+ Wake word-based voice assistant using local or remote services.
1404
+
1405
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
1406
+ │ --help -h Show this message and exit. │
1407
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1408
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
1409
+ │ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
1410
+ │ [env var: ASR_PROVIDER] │
1411
+ │ [default: wyoming] │
1412
+ │ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
1413
+ │ [env var: LLM_PROVIDER] │
1414
+ │ [default: ollama] │
1415
+ │ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
1416
+ │ 'gemini'). │
1417
+ │ [env var: TTS_PROVIDER] │
1418
+ │ [default: wyoming] │
1419
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1420
+ ╭─ Wake Word ────────────────────────────────────────────────────────────────────────────╮
1421
+ │ --wake-server-ip TEXT Wyoming wake word server IP address. │
1422
+ │ [default: localhost] │
1423
+ │ --wake-server-port INTEGER Wyoming wake word server port. │
1424
+ │ [default: 10400] │
1425
+ │ --wake-word TEXT Name of wake word to detect (e.g., 'ok_nabu', │
1426
+ │ 'hey_jarvis'). │
1427
+ │ [default: ok_nabu] │
1428
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1429
+ ╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
1430
+ │ --input-device-index INTEGER Index of the audio input device to use. │
1431
+ │ --input-device-name TEXT Device name keywords for partial matching. │
1432
+ │ --list-devices List available audio input and output devices and │
1433
+ │ exit. │
1434
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1435
+ ╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
1436
+ │ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
1437
+ │ [env var: ASR_WYOMING_IP] │
1438
+ │ [default: localhost] │
1439
+ │ --asr-wyoming-port INTEGER Wyoming ASR server port. │
1440
+ │ [env var: ASR_WYOMING_PORT] │
1441
+ │ [default: 10300] │
1442
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1443
+ ╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
1444
+ │ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
1445
+ │ [env var: ASR_OPENAI_MODEL] │
1446
+ │ [default: whisper-1] │
1447
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1448
+ ╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
1449
+ │ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
1450
+ │ [env var: ASR_GEMINI_MODEL] │
1451
+ │ [default: gemini-3-flash-preview] │
1452
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1453
+ ╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
1454
+ │ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
1455
+ │ [env var: LLM_OLLAMA_MODEL] │
1456
+ │ [default: gemma3:4b] │
1457
+ │ --llm-ollama-host TEXT The Ollama server host. Default is │
1458
+ │ http://localhost:11434. │
1459
+ │ [env var: LLM_OLLAMA_HOST] │
1460
+ │ [default: http://localhost:11434] │
1461
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1462
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
1463
+ │ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
1464
+ │ [env var: LLM_OPENAI_MODEL] │
1465
+ │ [default: gpt-5-mini] │
1466
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
1467
+ │ OPENAI_API_KEY environment variable. │
1468
+ │ [env var: OPENAI_API_KEY] │
1469
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
1470
+ │ llama-server: http://localhost:8080/v1). │
1471
+ │ [env var: OPENAI_BASE_URL] │
1472
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1473
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
1474
+ │ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
1475
+ │ [env var: LLM_GEMINI_MODEL] │
1476
+ │ [default: gemini-3-flash-preview] │
1477
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
1478
+ │ GEMINI_API_KEY environment variable. │
1479
+ │ [env var: GEMINI_API_KEY] │
1480
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1481
+ ╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
1482
+ │ --tts --no-tts Enable text-to-speech for responses. │
1483
+ │ [default: no-tts] │
1484
+ │ --output-device-index INTEGER Index of the audio output device to use │
1485
+ │ for TTS. │
1486
+ │ --output-device-name TEXT Output device name keywords for partial │
1487
+ │ matching. │
1488
+ │ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, │
1489
+ │ 2.0 = twice as fast, 0.5 = half speed). │
1490
+ │ [default: 1.0] │
1491
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1492
+ ╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
1493
+ │ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
1494
+ │ [default: localhost] │
1495
+ │ --tts-wyoming-port INTEGER Wyoming TTS server port. │
1496
+ │ [default: 10200] │
1497
+ │ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
1498
+ │ 'en_US-lessac-medium'). │
1499
+ │ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
1500
+ │ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
1501
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1502
+ ╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
1503
+ │ --tts-openai-model TEXT The OpenAI model to use for TTS. │
1504
+ │ [default: tts-1] │
1505
+ │ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
1506
+ │ [default: alloy] │
1507
+ │ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
1508
+ │ (e.g., http://localhost:8000/v1 for a proxy). │
1509
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1510
+ ╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
1511
+ │ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
1512
+ │ [default: kokoro] │
1513
+ │ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
1514
+ │ [default: af_sky] │
1515
+ │ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
1516
+ │ [default: http://localhost:8880/v1] │
1517
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1518
+ ╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
1519
+ │ --tts-gemini-model TEXT The Gemini model to use for TTS. │
1520
+ │ [default: gemini-2.5-flash-preview-tts] │
1521
+ │ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
1522
+ │ 'Charon', 'Fenrir'). │
1523
+ │ [default: Kore] │
1524
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1525
+ ╭─ Process Management ───────────────────────────────────────────────────────────────────╮
1526
+ │ --stop Stop any running background process. │
1527
+ │ --status Check if a background process is running. │
1528
+ │ --toggle Toggle the background process on/off. If the process is running, it │
1529
+ │ will be stopped. If the process is not running, it will be started. │
1530
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1531
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1532
+ │ --save-file PATH Save TTS response audio │
1533
+ │ to WAV file. │
1534
+ │ --clipboard --no-clipboard Copy result to │
1535
+ │ clipboard. │
1536
+ │ [default: clipboard] │
1537
+ │ --log-level [debug|info|warning|erro Set logging level. │
1538
+ │ r] [env var: LOG_LEVEL] │
1539
+ │ [default: info] │
1540
+ │ --log-file TEXT Path to a file to write │
1541
+ │ logs to. │
1542
+ │ --quiet -q Suppress console output │
1543
+ │ from rich. │
1544
+ │ --config TEXT Path to a TOML │
1545
+ │ configuration file. │
1546
+ │ --print-args Print the command line │
1547
+ │ arguments, including │
1548
+ │ variables taken from the │
1549
+ │ configuration file. │
1550
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1551
+
1552
+ ```
1553
+
1554
+ <!-- OUTPUT:END -->
1555
+
1556
+ </details>
1557
+
1558
+ ### `chat`
1559
+
1560
+ **Purpose:** A full-featured, conversational AI assistant that can interact with your system.
1561
+
1562
+ **Workflow:** This is a persistent, conversational agent that you can have a conversation with.
1563
+
1564
+ 1. Run the `chat` command. It will start listening for your voice.
1565
+ 2. Speak your command or question (e.g., "What's in my current directory?").
1566
+ 3. The agent transcribes your speech, sends it to the LLM, and gets a response. The LLM can use tools like `read_file` or `execute_code` to answer your question.
1567
+ 4. The agent speaks the response back to you and then immediately starts listening for your next command.
1568
+ 5. The conversation continues in this loop. Conversation history is saved between sessions.
1569
+
1570
+ **Interaction Model:**
1571
+
1572
+ - **To Interrupt**: Press `Ctrl+C` **once** to stop the agent from either listening or speaking, and it will immediately return to a listening state for a new command. This is useful if it misunderstands you or you want to speak again quickly.
1573
+ - **To Exit**: Press `Ctrl+C` **twice in a row** to terminate the application.
1574
+
1575
+ **How to Use It:**
1576
+
1577
+ - **Start the agent**: `agent-cli chat --input-device-index 1 --tts`
1578
+ - **Have a conversation**:
1579
+ - _You_: "Read the pyproject.toml file and tell me the project version."
1580
+ - _AI_: (Reads file) "The project version is 0.1.0."
1581
+ - _You_: "Thanks!"
1582
+
1583
+ <details>
1584
+ <summary>See the output of <code>agent-cli chat --help</code></summary>
1585
+
1586
+ <!-- CODE:BASH:START -->
1587
+ <!-- echo '```yaml' -->
1588
+ <!-- export NO_COLOR=1 -->
1589
+ <!-- export TERM=dumb -->
1590
+ <!-- export COLUMNS=90 -->
1591
+ <!-- export TERMINAL_WIDTH=90 -->
1592
+ <!-- agent-cli chat --help -->
1593
+ <!-- echo '```' -->
1594
+ <!-- CODE:END -->
1595
+ <!-- OUTPUT:START -->
1596
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
1597
+ ```yaml
1598
+
1599
+ Usage: agent-cli chat [OPTIONS]
1600
+
1601
+ An chat agent that you can talk to.
1602
+
1603
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
1604
+ │ --help -h Show this message and exit. │
1605
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1606
+ ╭─ Provider Selection ───────────────────────────────────────────────────────────────────╮
1607
+ │ --asr-provider TEXT The ASR provider to use ('wyoming', 'openai', 'gemini'). │
1608
+ │ [env var: ASR_PROVIDER] │
1609
+ │ [default: wyoming] │
1610
+ │ --llm-provider TEXT The LLM provider to use ('ollama', 'openai', 'gemini'). │
1611
+ │ [env var: LLM_PROVIDER] │
1612
+ │ [default: ollama] │
1613
+ │ --tts-provider TEXT The TTS provider to use ('wyoming', 'openai', 'kokoro', │
1614
+ │ 'gemini'). │
1615
+ │ [env var: TTS_PROVIDER] │
1616
+ │ [default: wyoming] │
1617
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1618
+ ╭─ Audio Input ──────────────────────────────────────────────────────────────────────────╮
1619
+ │ --input-device-index INTEGER Index of the audio input device to use. │
1620
+ │ --input-device-name TEXT Device name keywords for partial matching. │
1621
+ │ --list-devices List available audio input and output devices and │
1622
+ │ exit. │
1623
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1624
+ ╭─ Audio Input: Wyoming ─────────────────────────────────────────────────────────────────╮
1625
+ │ --asr-wyoming-ip TEXT Wyoming ASR server IP address. │
1626
+ │ [env var: ASR_WYOMING_IP] │
1627
+ │ [default: localhost] │
1628
+ │ --asr-wyoming-port INTEGER Wyoming ASR server port. │
1629
+ │ [env var: ASR_WYOMING_PORT] │
1630
+ │ [default: 10300] │
1631
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1632
+ ╭─ Audio Input: OpenAI-compatible ───────────────────────────────────────────────────────╮
1633
+ │ --asr-openai-model TEXT The OpenAI model to use for ASR (transcription). │
1634
+ │ [env var: ASR_OPENAI_MODEL] │
1635
+ │ [default: whisper-1] │
1636
+ │ --asr-openai-base-url TEXT Custom base URL for OpenAI-compatible ASR API │
1637
+ │ (e.g., for custom Whisper server: │
1638
+ │ http://localhost:9898). │
1639
+ │ [env var: ASR_OPENAI_BASE_URL] │
1640
+ │ --asr-openai-prompt TEXT Custom prompt to guide transcription (optional). │
1641
+ │ [env var: ASR_OPENAI_PROMPT] │
1642
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1643
+ ╭─ Audio Input: Gemini ──────────────────────────────────────────────────────────────────╮
1644
+ │ --asr-gemini-model TEXT The Gemini model to use for ASR (transcription). │
1645
+ │ [env var: ASR_GEMINI_MODEL] │
1646
+ │ [default: gemini-3-flash-preview] │
1647
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1648
+ ╭─ LLM: Ollama ──────────────────────────────────────────────────────────────────────────╮
1649
+ │ --llm-ollama-model TEXT The Ollama model to use. Default is gemma3:4b. │
1650
+ │ [env var: LLM_OLLAMA_MODEL] │
1651
+ │ [default: gemma3:4b] │
1652
+ │ --llm-ollama-host TEXT The Ollama server host. Default is │
1653
+ │ http://localhost:11434. │
1654
+ │ [env var: LLM_OLLAMA_HOST] │
1655
+ │ [default: http://localhost:11434] │
1656
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1657
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
1658
+ │ --llm-openai-model TEXT The OpenAI model to use for LLM tasks. │
1659
+ │ [env var: LLM_OPENAI_MODEL] │
1660
+ │ [default: gpt-5-mini] │
1661
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
1662
+ │ OPENAI_API_KEY environment variable. │
1663
+ │ [env var: OPENAI_API_KEY] │
1664
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
1665
+ │ llama-server: http://localhost:8080/v1). │
1666
+ │ [env var: OPENAI_BASE_URL] │
1667
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1668
+ ╭─ LLM: Gemini ──────────────────────────────────────────────────────────────────────────╮
1669
+ │ --llm-gemini-model TEXT The Gemini model to use for LLM tasks. │
1670
+ │ [env var: LLM_GEMINI_MODEL] │
1671
+ │ [default: gemini-3-flash-preview] │
1672
+ │ --gemini-api-key TEXT Your Gemini API key. Can also be set with the │
1673
+ │ GEMINI_API_KEY environment variable. │
1674
+ │ [env var: GEMINI_API_KEY] │
1675
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1676
+ ╭─ Audio Output ─────────────────────────────────────────────────────────────────────────╮
1677
+ │ --tts --no-tts Enable text-to-speech for responses. │
1678
+ │ [default: no-tts] │
1679
+ │ --output-device-index INTEGER Index of the audio output device to use │
1680
+ │ for TTS. │
1681
+ │ --output-device-name TEXT Output device name keywords for partial │
1682
+ │ matching. │
1683
+ │ --tts-speed FLOAT Speech speed multiplier (1.0 = normal, │
1684
+ │ 2.0 = twice as fast, 0.5 = half speed). │
1685
+ │ [default: 1.0] │
1686
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1687
+ ╭─ Audio Output: Wyoming ────────────────────────────────────────────────────────────────╮
1688
+ │ --tts-wyoming-ip TEXT Wyoming TTS server IP address. │
1689
+ │ [default: localhost] │
1690
+ │ --tts-wyoming-port INTEGER Wyoming TTS server port. │
1691
+ │ [default: 10200] │
1692
+ │ --tts-wyoming-voice TEXT Voice name to use for Wyoming TTS (e.g., │
1693
+ │ 'en_US-lessac-medium'). │
1694
+ │ --tts-wyoming-language TEXT Language for Wyoming TTS (e.g., 'en_US'). │
1695
+ │ --tts-wyoming-speaker TEXT Speaker name for Wyoming TTS voice. │
1696
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1697
+ ╭─ Audio Output: OpenAI-compatible ──────────────────────────────────────────────────────╮
1698
+ │ --tts-openai-model TEXT The OpenAI model to use for TTS. │
1699
+ │ [default: tts-1] │
1700
+ │ --tts-openai-voice TEXT The voice to use for OpenAI-compatible TTS. │
1701
+ │ [default: alloy] │
1702
+ │ --tts-openai-base-url TEXT Custom base URL for OpenAI-compatible TTS API │
1703
+ │ (e.g., http://localhost:8000/v1 for a proxy). │
1704
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1705
+ ╭─ Audio Output: Kokoro ─────────────────────────────────────────────────────────────────╮
1706
+ │ --tts-kokoro-model TEXT The Kokoro model to use for TTS. │
1707
+ │ [default: kokoro] │
1708
+ │ --tts-kokoro-voice TEXT The voice to use for Kokoro TTS. │
1709
+ │ [default: af_sky] │
1710
+ │ --tts-kokoro-host TEXT The base URL for the Kokoro API. │
1711
+ │ [default: http://localhost:8880/v1] │
1712
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1713
+ ╭─ Audio Output: Gemini ─────────────────────────────────────────────────────────────────╮
1714
+ │ --tts-gemini-model TEXT The Gemini model to use for TTS. │
1715
+ │ [default: gemini-2.5-flash-preview-tts] │
1716
+ │ --tts-gemini-voice TEXT The voice to use for Gemini TTS (e.g., 'Kore', 'Puck', │
1717
+ │ 'Charon', 'Fenrir'). │
1718
+ │ [default: Kore] │
1719
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1720
+ ╭─ Process Management ───────────────────────────────────────────────────────────────────╮
1721
+ │ --stop Stop any running background process. │
1722
+ │ --status Check if a background process is running. │
1723
+ │ --toggle Toggle the background process on/off. If the process is running, it │
1724
+ │ will be stopped. If the process is not running, it will be started. │
1725
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1726
+ ╭─ History Options ──────────────────────────────────────────────────────────────────────╮
1727
+ │ --history-dir PATH Directory to store conversation history. │
1728
+ │ [default: ~/.config/agent-cli/history] │
1729
+ │ --last-n-messages INTEGER Number of messages to include in the conversation │
1730
+ │ history. Set to 0 to disable history. │
1731
+ │ [default: 50] │
1732
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1733
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1734
+ │ --save-file PATH Save TTS response audio to WAV file. │
1735
+ │ --log-level [debug|info|warning|error] Set logging level. │
1736
+ │ [env var: LOG_LEVEL] │
1737
+ │ [default: info] │
1738
+ │ --log-file TEXT Path to a file to write logs to. │
1739
+ │ --quiet -q Suppress console output from rich. │
1740
+ │ --config TEXT Path to a TOML configuration file. │
1741
+ │ --print-args Print the command line arguments, │
1742
+ │ including variables taken from the │
1743
+ │ configuration file. │
1744
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1745
+
1746
+ ```
1747
+
1748
+ <!-- OUTPUT:END -->
1749
+
1750
+ </details>
1751
+
1752
+
1753
+ ### `rag-proxy`
1754
+
1755
+ **Purpose:** Enables "Chat with your Data" by running a local proxy server that injects document context into LLM requests.
1756
+
1757
+ **Workflow:**
1758
+
1759
+ 1. Start the server, pointing it to your documents folder and your local LLM (e.g., Ollama or llama.cpp) or OpenAI.
1760
+ 2. The server watches the folder and automatically indexes any text/markdown/PDF files into a local ChromaDB vector store.
1761
+ 3. Point any OpenAI-compatible client (including `agent-cli chat`) to this server's URL.
1762
+ 4. When you ask a question, the server retrieves relevant document chunks, adds them to the prompt, and forwards it to the LLM.
1763
+
1764
+ **How to Use It:**
1765
+
1766
+ - **Install RAG deps first**: `pip install "agent-cli[rag]"` (or, from the repo, `uv sync --extra rag`)
1767
+ - **Start Server (Local LLM)**: `agent-cli rag-proxy --docs-folder ~/Documents/Notes --openai-base-url http://localhost:11434/v1 --port 8000`
1768
+ - **Start Server (OpenAI)**: `agent-cli rag-proxy --docs-folder ~/Documents/Notes --openai-api-key sk-...`
1769
+ - **Use with Agent-CLI**: `agent-cli chat --openai-base-url http://localhost:8000/v1 --llm-provider openai`
1770
+
1771
+ <details>
1772
+ <summary>See the output of <code>agent-cli rag-proxy --help</code></summary>
1773
+
1774
+ <!-- CODE:BASH:START -->
1775
+ <!-- echo '```yaml' -->
1776
+ <!-- export NO_COLOR=1 -->
1777
+ <!-- export TERM=dumb -->
1778
+ <!-- export COLUMNS=90 -->
1779
+ <!-- export TERMINAL_WIDTH=90 -->
1780
+ <!-- agent-cli rag-proxy --help -->
1781
+ <!-- echo '```' -->
1782
+ <!-- CODE:END -->
1783
+ <!-- OUTPUT:START -->
1784
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
1785
+ ```yaml
1786
+
1787
+ Usage: agent-cli rag-proxy [OPTIONS]
1788
+
1789
+ Start the RAG (Retrieval-Augmented Generation) Proxy Server.
1790
+
1791
+ This server watches a folder for documents, indexes them, and provides an
1792
+ OpenAI-compatible API that proxies requests to a backend LLM (like llama.cpp), injecting
1793
+ relevant context from the documents.
1794
+
1795
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
1796
+ │ --help -h Show this message and exit. │
1797
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1798
+ ╭─ RAG Configuration ────────────────────────────────────────────────────────────────────╮
1799
+ │ --docs-folder PATH Folder to watch for documents │
1800
+ │ [default: ./rag_docs] │
1801
+ │ --chroma-path PATH Path to ChromaDB persistence directory │
1802
+ │ [default: ./rag_db] │
1803
+ │ --limit INTEGER Number of document chunks to retrieve per │
1804
+ │ query. │
1805
+ │ [default: 3] │
1806
+ │ --rag-tools --no-rag-tools Allow agent to fetch full documents when │
1807
+ │ snippets are insufficient. │
1808
+ │ [default: rag-tools] │
1809
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1810
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
1811
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
1812
+ │ llama-server: http://localhost:8080/v1). │
1813
+ │ [env var: OPENAI_BASE_URL] │
1814
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
1815
+ │ OPENAI_API_KEY environment variable. │
1816
+ │ [env var: OPENAI_API_KEY] │
1817
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1818
+ ╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
1819
+ │ --embedding-model TEXT Embedding model to use for vectorization. │
1820
+ │ [default: text-embedding-3-small] │
1821
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1822
+ ╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
1823
+ │ --host TEXT Host/IP to bind API servers to. │
1824
+ │ [default: 0.0.0.0] │
1825
+ │ --port INTEGER Port to bind to │
1826
+ │ [default: 8000] │
1827
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1828
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1829
+ │ --log-level [debug|info|warning|error] Set logging level. │
1830
+ │ [env var: LOG_LEVEL] │
1831
+ │ [default: info] │
1832
+ │ --config TEXT Path to a TOML configuration file. │
1833
+ │ --print-args Print the command line arguments, │
1834
+ │ including variables taken from the │
1835
+ │ configuration file. │
1836
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1837
+
1838
+ ```
1839
+
1840
+ <!-- OUTPUT:END -->
1841
+
1842
+ </details>
1843
+
1844
+ ### `memory`
1845
+
1846
+ The `memory proxy` command is the core feature—a middleware server that gives any OpenAI-compatible app long-term memory. Additional subcommands (`memory add`, etc.) help manage the memory store directly.
1847
+
1848
+ #### `memory proxy`
1849
+
1850
+ **Purpose:** Adds long-term conversational memory (self-hosted) to any OpenAI-compatible client.
1851
+
1852
+ **Key Features:**
1853
+
1854
+ - **Simple Markdown Files:** Your memories are stored as human-readable Markdown files, serving as the ultimate source of truth.
1855
+ - **Automatic Version Control:** Built-in Git integration automatically commits changes, giving you a full history of your memory's evolution.
1856
+ - **Lightweight & Local:** Minimal dependencies and runs entirely on your machine.
1857
+ - **Proxy Middleware:** Works transparently with any OpenAI-compatible `/chat/completions` endpoint (OpenAI, Ollama, vLLM).
1858
+
1859
+ **Workflow:**
1860
+
1861
+ - Stores a per-conversation memory collection in Chroma with the same embedding settings as `rag-proxy`, reranked with a cross-encoder.
1862
+ - For each turn, retrieves the top-k relevant memories (conversation + global) plus a rolling summary and augments the prompt.
1863
+ - After each reply, extracts salient facts and refreshes the running summary (disable with `--no-summarization`).
1864
+ - Enforces a per-conversation cap (`--max-entries`, default 500) and evicts oldest memories first.
1865
+
1866
+ **How to Use It:**
1867
+
1868
+ - **Install memory deps first**: `pip install "agent-cli[memory]"` (or, from the repo, `uv sync --extra memory`)
1869
+ - **Start Server (Local LLM/OpenAI-compatible)**: `agent-cli memory proxy --memory-path ./memory_db --openai-base-url http://localhost:11434/v1 --embedding-model embeddinggemma:300m`
1870
+ - **Use with Agent-CLI**: `agent-cli chat --openai-base-url http://localhost:8100/v1 --llm-provider openai`
1871
+
1872
+ <details>
1873
+ <summary>See the output of <code>agent-cli memory proxy --help</code></summary>
1874
+
1875
+ <!-- CODE:BASH:START -->
1876
+ <!-- echo '```yaml' -->
1877
+ <!-- export NO_COLOR=1 -->
1878
+ <!-- export TERM=dumb -->
1879
+ <!-- export COLUMNS=90 -->
1880
+ <!-- export TERMINAL_WIDTH=90 -->
1881
+ <!-- agent-cli memory proxy --help -->
1882
+ <!-- echo '```' -->
1883
+ <!-- CODE:END -->
1884
+ <!-- OUTPUT:START -->
1885
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
1886
+ ```yaml
1887
+
1888
+ Usage: agent-cli memory proxy [OPTIONS]
1889
+
1890
+ Start the memory-backed chat proxy server.
1891
+
1892
+ This server acts as a middleware between your chat client (e.g., a web UI, CLI, or IDE
1893
+ plugin) and an OpenAI-compatible LLM provider (e.g., OpenAI, Ollama, vLLM).
1894
+
1895
+ Key Features:
1896
+
1897
+ • Simple Markdown Files: Memories are stored as human-readable Markdown files, serving
1898
+ as the ultimate source of truth.
1899
+ • Automatic Version Control: Built-in Git integration automatically commits changes,
1900
+ providing a full history of memory evolution.
1901
+ • Lightweight & Local: Minimal dependencies and runs entirely on your machine.
1902
+ • Proxy Middleware: Works transparently with any OpenAI-compatible /chat/completions
1903
+ endpoint.
1904
+
1905
+ How it works:
1906
+
1907
+ 1 Intercepts POST /v1/chat/completions requests.
1908
+ 2 Retrieves relevant memories (facts, previous conversations) from a local vector
1909
+ database (ChromaDB) based on the user's query.
1910
+ 3 Injects these memories into the system prompt.
1911
+ 4 Forwards the augmented request to the real LLM (--openai-base-url).
1912
+ 5 Extracts new facts from the conversation in the background and updates the long-term
1913
+ memory store (including handling contradictions).
1914
+
1915
+ Use this to give "long-term memory" to any OpenAI-compatible application. Point your
1916
+ client's base URL to http://localhost:8100/v1.
1917
+
1918
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
1919
+ │ --help -h Show this message and exit. │
1920
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1921
+ ╭─ Memory Configuration ─────────────────────────────────────────────────────────────────╮
1922
+ │ --memory-path PATH Path to the memory store (files + │
1923
+ │ derived vector index). │
1924
+ │ [default: ./memory_db] │
1925
+ │ --default-top-k INTEGER Number of memory entries to │
1926
+ │ retrieve per query. │
1927
+ │ [default: 5] │
1928
+ │ --max-entries INTEGER Maximum stored memory entries per │
1929
+ │ conversation (excluding summary). │
1930
+ │ [default: 500] │
1931
+ │ --mmr-lambda FLOAT MMR lambda (0-1): higher favors │
1932
+ │ relevance, lower favors │
1933
+ │ diversity. │
1934
+ │ [default: 0.7] │
1935
+ │ --recency-weight FLOAT Recency score weight (0.0-1.0). │
1936
+ │ Controls freshness vs. relevance. │
1937
+ │ Default 0.2 (20% recency, 80% │
1938
+ │ semantic relevance). │
1939
+ │ [default: 0.2] │
1940
+ │ --score-threshold FLOAT Minimum semantic relevance │
1941
+ │ threshold (0.0-1.0). Memories │
1942
+ │ below this score are discarded to │
1943
+ │ reduce noise. │
1944
+ │ [default: 0.35] │
1945
+ │ --summarization --no-summarization Enable automatic fact extraction │
1946
+ │ and summaries. │
1947
+ │ [default: summarization] │
1948
+ │ --git-versioning --no-git-versioning Enable automatic git commit of │
1949
+ │ memory changes. │
1950
+ │ [default: git-versioning] │
1951
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1952
+ ╭─ LLM: OpenAI-compatible ───────────────────────────────────────────────────────────────╮
1953
+ │ --openai-base-url TEXT Custom base URL for OpenAI-compatible API (e.g., for │
1954
+ │ llama-server: http://localhost:8080/v1). │
1955
+ │ [env var: OPENAI_BASE_URL] │
1956
+ │ --openai-api-key TEXT Your OpenAI API key. Can also be set with the │
1957
+ │ OPENAI_API_KEY environment variable. │
1958
+ │ [env var: OPENAI_API_KEY] │
1959
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1960
+ ╭─ LLM Configuration ────────────────────────────────────────────────────────────────────╮
1961
+ │ --embedding-model TEXT Embedding model to use for vectorization. │
1962
+ │ [default: text-embedding-3-small] │
1963
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1964
+ ╭─ Server Configuration ─────────────────────────────────────────────────────────────────╮
1965
+ │ --host TEXT Host/IP to bind API servers to. │
1966
+ │ [default: 0.0.0.0] │
1967
+ │ --port INTEGER Port to bind to │
1968
+ │ [default: 8100] │
1969
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1970
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
1971
+ │ --log-level [debug|info|warning|error] Set logging level. │
1972
+ │ [env var: LOG_LEVEL] │
1973
+ │ [default: info] │
1974
+ │ --config TEXT Path to a TOML configuration file. │
1975
+ │ --print-args Print the command line arguments, │
1976
+ │ including variables taken from the │
1977
+ │ configuration file. │
1978
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
1979
+
1980
+ ```
1981
+
1982
+ <!-- OUTPUT:END -->
1983
+
1984
+ </details>
1985
+
1986
+ #### `memory add`
1987
+
1988
+ **Purpose:** Directly add memories to the store without LLM extraction. Useful for bulk imports or seeding memories.
1989
+
1990
+ **How to Use It:**
1991
+
1992
+ ```bash
1993
+ # Add single memories as arguments
1994
+ agent-cli memory add "User likes coffee" "User lives in Amsterdam"
1995
+
1996
+ # Read from JSON file
1997
+ agent-cli memory add -f memories.json
1998
+
1999
+ # Read from stdin (plain text, one per line)
2000
+ echo "User prefers dark mode" | agent-cli memory add -f -
2001
+
2002
+ # Read JSON from stdin
2003
+ echo '["Fact one", "Fact two"]' | agent-cli memory add -f -
2004
+
2005
+ # Specify conversation ID
2006
+ agent-cli memory add -c work "Project deadline is Friday"
2007
+ ```
2008
+
2009
+ <details>
2010
+ <summary>See the output of <code>agent-cli memory add --help</code></summary>
2011
+
2012
+ <!-- CODE:BASH:START -->
2013
+ <!-- echo '```yaml' -->
2014
+ <!-- export NO_COLOR=1 -->
2015
+ <!-- export TERM=dumb -->
2016
+ <!-- export COLUMNS=90 -->
2017
+ <!-- export TERMINAL_WIDTH=90 -->
2018
+ <!-- agent-cli memory add --help -->
2019
+ <!-- echo '```' -->
2020
+ <!-- CODE:END -->
2021
+ <!-- OUTPUT:START -->
2022
+ <!-- ⚠️ This content is auto-generated by `markdown-code-runner`. -->
2023
+ ```yaml
2024
+
2025
+ Usage: agent-cli memory add [OPTIONS] [MEMORIES]...
2026
+
2027
+ Add memories directly without LLM extraction.
2028
+
2029
+ This writes facts directly to the memory store, bypassing the LLM-based fact extraction.
2030
+ Useful for bulk imports or seeding memories.
2031
+
2032
+ The memory proxy file watcher (if running) will auto-index new files. Otherwise, they'll
2033
+ be indexed on next memory proxy startup.
2034
+
2035
+ Examples::
2036
+
2037
+
2038
+ # Add single memories as arguments
2039
+ agent-cli memory add "User likes coffee" "User lives in Amsterdam"
2040
+
2041
+ # Read from JSON file
2042
+ agent-cli memory add -f memories.json
2043
+
2044
+ # Read from stdin (plain text, one per line)
2045
+ echo "User prefers dark mode" | agent-cli memory add -f -
2046
+
2047
+ # Read JSON from stdin
2048
+ echo '["Fact one", "Fact two"]' | agent-cli memory add -f -
2049
+
2050
+ # Specify conversation ID
2051
+ agent-cli memory add -c work "Project deadline is Friday"
2052
+
2053
+
2054
+ ╭─ Arguments ────────────────────────────────────────────────────────────────────────────╮
2055
+ │ memories [MEMORIES]... Memories to add. Each argument becomes one fact. │
2056
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
2057
+ ╭─ Options ──────────────────────────────────────────────────────────────────────────────╮
2058
+ │ --file -f PATH Read memories from file. Use '-' │
2059
+ │ for stdin. Supports JSON array, │
2060
+ │ JSON object with 'memories' key, │
2061
+ │ or plain text (one per line). │
2062
+ │ --conversation-id -c TEXT Conversation ID to add memories │
2063
+ │ to. │
2064
+ │ [default: default] │
2065
+ │ --memory-path PATH Path to the memory store. │
2066
+ │ [default: ./memory_db] │
2067
+ │ --git-versioning --no-git-versioning Commit changes to git. │
2068
+ │ [default: git-versioning] │
2069
+ │ --help -h Show this message and exit. │
2070
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
2071
+ ╭─ General Options ──────────────────────────────────────────────────────────────────────╮
2072
+ │ --quiet -q Suppress console output from rich. │
2073
+ │ --config TEXT Path to a TOML configuration file. │
2074
+ │ --print-args Print the command line arguments, including variables │
2075
+ │ taken from the configuration file. │
2076
+ ╰────────────────────────────────────────────────────────────────────────────────────────╯
2077
+
2078
+ ```
2079
+
2080
+ <!-- OUTPUT:END -->
2081
+
2082
+ </details>
2083
+
2084
+ ## Development
2085
+
2086
+ ### Running Tests
2087
+
2088
+ The project uses `pytest` for testing. To run tests using `uv`:
2089
+
2090
+ ```bash
2091
+ uv run pytest
2092
+ ```
2093
+
2094
+ ### Pre-commit Hooks
2095
+
2096
+ This project uses pre-commit hooks (ruff for linting and formatting, mypy for type checking) to maintain code quality. To set them up:
2097
+
2098
+ 1. Install pre-commit:
2099
+
2100
+ ```bash
2101
+ pip install pre-commit
2102
+ ```
2103
+
2104
+ 2. Install the hooks:
2105
+
2106
+ ```bash
2107
+ pre-commit install
2108
+ ```
2109
+
2110
+ Now, the hooks will run automatically before each commit.
2111
+
2112
+ ## Contributing
2113
+
2114
+ Contributions are welcome! If you find a bug or have a feature request, please open an issue. If you'd like to contribute code, please fork the repository and submit a pull request.
2115
+
2116
+ ## License
2117
+
2118
+ This project is licensed under the MIT License - see the `LICENSE` file for details.