agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. agent_cli/__init__.py +5 -0
  2. agent_cli/__main__.py +6 -0
  3. agent_cli/_extras.json +14 -0
  4. agent_cli/_requirements/.gitkeep +0 -0
  5. agent_cli/_requirements/audio.txt +79 -0
  6. agent_cli/_requirements/faster-whisper.txt +215 -0
  7. agent_cli/_requirements/kokoro.txt +425 -0
  8. agent_cli/_requirements/llm.txt +183 -0
  9. agent_cli/_requirements/memory.txt +355 -0
  10. agent_cli/_requirements/mlx-whisper.txt +222 -0
  11. agent_cli/_requirements/piper.txt +176 -0
  12. agent_cli/_requirements/rag.txt +402 -0
  13. agent_cli/_requirements/server.txt +154 -0
  14. agent_cli/_requirements/speed.txt +77 -0
  15. agent_cli/_requirements/vad.txt +155 -0
  16. agent_cli/_requirements/wyoming.txt +71 -0
  17. agent_cli/_tools.py +368 -0
  18. agent_cli/agents/__init__.py +23 -0
  19. agent_cli/agents/_voice_agent_common.py +136 -0
  20. agent_cli/agents/assistant.py +383 -0
  21. agent_cli/agents/autocorrect.py +284 -0
  22. agent_cli/agents/chat.py +496 -0
  23. agent_cli/agents/memory/__init__.py +31 -0
  24. agent_cli/agents/memory/add.py +190 -0
  25. agent_cli/agents/memory/proxy.py +160 -0
  26. agent_cli/agents/rag_proxy.py +128 -0
  27. agent_cli/agents/speak.py +209 -0
  28. agent_cli/agents/transcribe.py +671 -0
  29. agent_cli/agents/transcribe_daemon.py +499 -0
  30. agent_cli/agents/voice_edit.py +291 -0
  31. agent_cli/api.py +22 -0
  32. agent_cli/cli.py +106 -0
  33. agent_cli/config.py +503 -0
  34. agent_cli/config_cmd.py +307 -0
  35. agent_cli/constants.py +27 -0
  36. agent_cli/core/__init__.py +1 -0
  37. agent_cli/core/audio.py +461 -0
  38. agent_cli/core/audio_format.py +299 -0
  39. agent_cli/core/chroma.py +88 -0
  40. agent_cli/core/deps.py +191 -0
  41. agent_cli/core/openai_proxy.py +139 -0
  42. agent_cli/core/process.py +195 -0
  43. agent_cli/core/reranker.py +120 -0
  44. agent_cli/core/sse.py +87 -0
  45. agent_cli/core/transcription_logger.py +70 -0
  46. agent_cli/core/utils.py +526 -0
  47. agent_cli/core/vad.py +175 -0
  48. agent_cli/core/watch.py +65 -0
  49. agent_cli/dev/__init__.py +14 -0
  50. agent_cli/dev/cli.py +1588 -0
  51. agent_cli/dev/coding_agents/__init__.py +19 -0
  52. agent_cli/dev/coding_agents/aider.py +24 -0
  53. agent_cli/dev/coding_agents/base.py +167 -0
  54. agent_cli/dev/coding_agents/claude.py +39 -0
  55. agent_cli/dev/coding_agents/codex.py +24 -0
  56. agent_cli/dev/coding_agents/continue_dev.py +15 -0
  57. agent_cli/dev/coding_agents/copilot.py +24 -0
  58. agent_cli/dev/coding_agents/cursor_agent.py +48 -0
  59. agent_cli/dev/coding_agents/gemini.py +28 -0
  60. agent_cli/dev/coding_agents/opencode.py +15 -0
  61. agent_cli/dev/coding_agents/registry.py +49 -0
  62. agent_cli/dev/editors/__init__.py +19 -0
  63. agent_cli/dev/editors/base.py +89 -0
  64. agent_cli/dev/editors/cursor.py +15 -0
  65. agent_cli/dev/editors/emacs.py +46 -0
  66. agent_cli/dev/editors/jetbrains.py +56 -0
  67. agent_cli/dev/editors/nano.py +31 -0
  68. agent_cli/dev/editors/neovim.py +33 -0
  69. agent_cli/dev/editors/registry.py +59 -0
  70. agent_cli/dev/editors/sublime.py +20 -0
  71. agent_cli/dev/editors/vim.py +42 -0
  72. agent_cli/dev/editors/vscode.py +15 -0
  73. agent_cli/dev/editors/zed.py +20 -0
  74. agent_cli/dev/project.py +568 -0
  75. agent_cli/dev/registry.py +52 -0
  76. agent_cli/dev/skill/SKILL.md +141 -0
  77. agent_cli/dev/skill/examples.md +571 -0
  78. agent_cli/dev/terminals/__init__.py +19 -0
  79. agent_cli/dev/terminals/apple_terminal.py +82 -0
  80. agent_cli/dev/terminals/base.py +56 -0
  81. agent_cli/dev/terminals/gnome.py +51 -0
  82. agent_cli/dev/terminals/iterm2.py +84 -0
  83. agent_cli/dev/terminals/kitty.py +77 -0
  84. agent_cli/dev/terminals/registry.py +48 -0
  85. agent_cli/dev/terminals/tmux.py +58 -0
  86. agent_cli/dev/terminals/warp.py +132 -0
  87. agent_cli/dev/terminals/zellij.py +78 -0
  88. agent_cli/dev/worktree.py +856 -0
  89. agent_cli/docs_gen.py +417 -0
  90. agent_cli/example-config.toml +185 -0
  91. agent_cli/install/__init__.py +5 -0
  92. agent_cli/install/common.py +89 -0
  93. agent_cli/install/extras.py +174 -0
  94. agent_cli/install/hotkeys.py +48 -0
  95. agent_cli/install/services.py +87 -0
  96. agent_cli/memory/__init__.py +7 -0
  97. agent_cli/memory/_files.py +250 -0
  98. agent_cli/memory/_filters.py +63 -0
  99. agent_cli/memory/_git.py +157 -0
  100. agent_cli/memory/_indexer.py +142 -0
  101. agent_cli/memory/_ingest.py +408 -0
  102. agent_cli/memory/_persistence.py +182 -0
  103. agent_cli/memory/_prompt.py +91 -0
  104. agent_cli/memory/_retrieval.py +294 -0
  105. agent_cli/memory/_store.py +169 -0
  106. agent_cli/memory/_streaming.py +44 -0
  107. agent_cli/memory/_tasks.py +48 -0
  108. agent_cli/memory/api.py +113 -0
  109. agent_cli/memory/client.py +272 -0
  110. agent_cli/memory/engine.py +361 -0
  111. agent_cli/memory/entities.py +43 -0
  112. agent_cli/memory/models.py +112 -0
  113. agent_cli/opts.py +433 -0
  114. agent_cli/py.typed +0 -0
  115. agent_cli/rag/__init__.py +3 -0
  116. agent_cli/rag/_indexer.py +67 -0
  117. agent_cli/rag/_indexing.py +226 -0
  118. agent_cli/rag/_prompt.py +30 -0
  119. agent_cli/rag/_retriever.py +156 -0
  120. agent_cli/rag/_store.py +48 -0
  121. agent_cli/rag/_utils.py +218 -0
  122. agent_cli/rag/api.py +175 -0
  123. agent_cli/rag/client.py +299 -0
  124. agent_cli/rag/engine.py +302 -0
  125. agent_cli/rag/models.py +55 -0
  126. agent_cli/scripts/.runtime/.gitkeep +0 -0
  127. agent_cli/scripts/__init__.py +1 -0
  128. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  129. agent_cli/scripts/linux-hotkeys/README.md +63 -0
  130. agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
  131. agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
  132. agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
  133. agent_cli/scripts/macos-hotkeys/README.md +45 -0
  134. agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
  135. agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
  136. agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
  137. agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
  138. agent_cli/scripts/nvidia-asr-server/README.md +99 -0
  139. agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
  140. agent_cli/scripts/nvidia-asr-server/server.py +255 -0
  141. agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
  142. agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
  143. agent_cli/scripts/run-openwakeword.sh +11 -0
  144. agent_cli/scripts/run-piper-windows.ps1 +30 -0
  145. agent_cli/scripts/run-piper.sh +24 -0
  146. agent_cli/scripts/run-whisper-linux.sh +40 -0
  147. agent_cli/scripts/run-whisper-macos.sh +6 -0
  148. agent_cli/scripts/run-whisper-windows.ps1 +51 -0
  149. agent_cli/scripts/run-whisper.sh +9 -0
  150. agent_cli/scripts/run_faster_whisper_server.py +136 -0
  151. agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
  152. agent_cli/scripts/setup-linux.sh +108 -0
  153. agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
  154. agent_cli/scripts/setup-macos.sh +76 -0
  155. agent_cli/scripts/setup-windows.ps1 +63 -0
  156. agent_cli/scripts/start-all-services-windows.ps1 +53 -0
  157. agent_cli/scripts/start-all-services.sh +178 -0
  158. agent_cli/scripts/sync_extras.py +138 -0
  159. agent_cli/server/__init__.py +3 -0
  160. agent_cli/server/cli.py +721 -0
  161. agent_cli/server/common.py +222 -0
  162. agent_cli/server/model_manager.py +288 -0
  163. agent_cli/server/model_registry.py +225 -0
  164. agent_cli/server/proxy/__init__.py +3 -0
  165. agent_cli/server/proxy/api.py +444 -0
  166. agent_cli/server/streaming.py +67 -0
  167. agent_cli/server/tts/__init__.py +3 -0
  168. agent_cli/server/tts/api.py +335 -0
  169. agent_cli/server/tts/backends/__init__.py +82 -0
  170. agent_cli/server/tts/backends/base.py +139 -0
  171. agent_cli/server/tts/backends/kokoro.py +403 -0
  172. agent_cli/server/tts/backends/piper.py +253 -0
  173. agent_cli/server/tts/model_manager.py +201 -0
  174. agent_cli/server/tts/model_registry.py +28 -0
  175. agent_cli/server/tts/wyoming_handler.py +249 -0
  176. agent_cli/server/whisper/__init__.py +3 -0
  177. agent_cli/server/whisper/api.py +413 -0
  178. agent_cli/server/whisper/backends/__init__.py +89 -0
  179. agent_cli/server/whisper/backends/base.py +97 -0
  180. agent_cli/server/whisper/backends/faster_whisper.py +225 -0
  181. agent_cli/server/whisper/backends/mlx.py +270 -0
  182. agent_cli/server/whisper/languages.py +116 -0
  183. agent_cli/server/whisper/model_manager.py +157 -0
  184. agent_cli/server/whisper/model_registry.py +28 -0
  185. agent_cli/server/whisper/wyoming_handler.py +203 -0
  186. agent_cli/services/__init__.py +343 -0
  187. agent_cli/services/_wyoming_utils.py +64 -0
  188. agent_cli/services/asr.py +506 -0
  189. agent_cli/services/llm.py +228 -0
  190. agent_cli/services/tts.py +450 -0
  191. agent_cli/services/wake_word.py +142 -0
  192. agent_cli-0.70.5.dist-info/METADATA +2118 -0
  193. agent_cli-0.70.5.dist-info/RECORD +196 -0
  194. agent_cli-0.70.5.dist-info/WHEEL +4 -0
  195. agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
  196. agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,499 @@
1
+ """Continuous transcription daemon with voice activity detection."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import json
7
+ import logging
8
+ import platform
9
+ import signal
10
+ from contextlib import suppress
11
+ from dataclasses import dataclass
12
+ from datetime import UTC, datetime
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING
15
+
16
+ import typer
17
+
18
+ from agent_cli import config, constants, opts
19
+ from agent_cli.agents.transcribe import (
20
+ AGENT_INSTRUCTIONS,
21
+ INSTRUCTION,
22
+ SYSTEM_PROMPT,
23
+ )
24
+ from agent_cli.cli import app
25
+ from agent_cli.core import process
26
+ from agent_cli.core.audio import open_audio_stream, setup_devices, setup_input_stream
27
+ from agent_cli.core.audio_format import check_ffmpeg_available, save_audio_as_mp3
28
+ from agent_cli.core.deps import requires_extras
29
+ from agent_cli.core.utils import (
30
+ console,
31
+ print_command_line_args,
32
+ print_with_style,
33
+ setup_logging,
34
+ )
35
+ from agent_cli.services.asr import create_recorded_audio_transcriber
36
+ from agent_cli.services.llm import process_and_update_clipboard
37
+
38
+ if TYPE_CHECKING:
39
+ from agent_cli.core.vad import VoiceActivityDetector
40
+
41
+ LOGGER = logging.getLogger()
42
+
43
+ _DEFAULT_AUDIO_DIR = Path.home() / ".config" / "agent-cli" / "audio"
44
+ _DEFAULT_LOG_FILE = Path.home() / ".config" / "agent-cli" / "transcriptions.jsonl"
45
+ _MIN_SEGMENT_DURATION_SECONDS = 0.3
46
+
47
+
48
+ @dataclass
49
+ class DaemonConfig:
50
+ """Bundle of all daemon configuration."""
51
+
52
+ role: str
53
+ vad: VoiceActivityDetector
54
+ input_device_index: int | None
55
+ provider: config.ProviderSelection
56
+ wyoming_asr: config.WyomingASR
57
+ openai_asr: config.OpenAIASR
58
+ gemini_asr: config.GeminiASR
59
+ ollama: config.Ollama
60
+ openai_llm: config.OpenAILLM
61
+ gemini_llm: config.GeminiLLM
62
+ llm_enabled: bool
63
+ save_audio: bool
64
+ audio_dir: Path
65
+ log_file: Path
66
+ quiet: bool
67
+ clipboard: bool
68
+
69
+
70
+ def _generate_audio_path(audio_dir: Path, timestamp: datetime) -> Path:
71
+ """Generate a path for an audio file based on timestamp."""
72
+ date_dir = audio_dir / timestamp.strftime("%Y/%m/%d")
73
+ date_dir.mkdir(parents=True, exist_ok=True)
74
+ filename = timestamp.strftime("%H%M%S") + f"_{timestamp.microsecond // 1000:03d}.mp3"
75
+ return date_dir / filename
76
+
77
+
78
+ def _log_segment(
79
+ log_file: Path,
80
+ *,
81
+ timestamp: datetime,
82
+ role: str,
83
+ raw_output: str,
84
+ processed_output: str | None,
85
+ audio_file: Path | None,
86
+ duration_seconds: float,
87
+ model_info: str | None = None,
88
+ ) -> None:
89
+ """Append a transcription segment to the log file."""
90
+ entry = {
91
+ "timestamp": timestamp.isoformat(),
92
+ "hostname": platform.node(),
93
+ "role": role,
94
+ "model": model_info,
95
+ "raw_output": raw_output,
96
+ "processed_output": processed_output,
97
+ "audio_file": str(audio_file) if audio_file else None,
98
+ "duration_seconds": round(duration_seconds, 2),
99
+ }
100
+ log_file.parent.mkdir(parents=True, exist_ok=True)
101
+ with log_file.open("a", encoding="utf-8") as f:
102
+ f.write(json.dumps(entry, ensure_ascii=False) + "\n")
103
+
104
+
105
+ async def _process_segment( # noqa: PLR0912
106
+ cfg: DaemonConfig,
107
+ segment: bytes,
108
+ timestamp: datetime,
109
+ ) -> None:
110
+ """Process a speech segment: transcribe, optionally LLM-clean, and log."""
111
+ duration = cfg.vad.get_segment_duration_seconds(segment)
112
+ if duration < _MIN_SEGMENT_DURATION_SECONDS:
113
+ LOGGER.debug("Skipping very short segment: %.2fs", duration)
114
+ return
115
+
116
+ # Save audio as MP3 if requested (run in thread to avoid blocking event loop)
117
+ audio_path: Path | None = None
118
+ if cfg.save_audio:
119
+ try:
120
+ audio_path = _generate_audio_path(cfg.audio_dir, timestamp)
121
+ await asyncio.to_thread(save_audio_as_mp3, segment, audio_path)
122
+ LOGGER.debug("Saved audio to %s", audio_path)
123
+ except RuntimeError:
124
+ LOGGER.exception("Failed to save audio as MP3")
125
+
126
+ # Transcribe
127
+ transcriber = create_recorded_audio_transcriber(cfg.provider)
128
+ if cfg.provider.asr_provider == "openai":
129
+ transcript = await transcriber(segment, cfg.openai_asr, LOGGER, quiet=cfg.quiet)
130
+ elif cfg.provider.asr_provider == "gemini":
131
+ transcript = await transcriber(segment, cfg.gemini_asr, LOGGER, quiet=cfg.quiet)
132
+ elif cfg.provider.asr_provider == "wyoming":
133
+ transcript = await transcriber(
134
+ audio_data=segment,
135
+ wyoming_asr_cfg=cfg.wyoming_asr,
136
+ logger=LOGGER,
137
+ quiet=cfg.quiet,
138
+ )
139
+ else:
140
+ msg = f"Unsupported ASR provider: {cfg.provider.asr_provider}"
141
+ raise NotImplementedError(msg)
142
+
143
+ if not transcript or not transcript.strip():
144
+ LOGGER.debug("Empty transcript, skipping")
145
+ if not cfg.quiet:
146
+ console.print("[green]👂 Listening...[/green]" + " " * 20, end="\r")
147
+ return
148
+
149
+ if not cfg.quiet:
150
+ console.print(" " * 50, end="\r")
151
+ console.print(
152
+ f"[dim]{timestamp.strftime('%H:%M:%S')}[/dim] [cyan]{cfg.role}[/cyan]: {transcript}",
153
+ )
154
+ console.file.flush()
155
+
156
+ # LLM cleanup if enabled
157
+ processed: str | None = None
158
+ model_info: str | None = None
159
+
160
+ if cfg.llm_enabled:
161
+ models = {
162
+ "ollama": cfg.ollama.llm_ollama_model,
163
+ "openai": cfg.openai_llm.llm_openai_model,
164
+ "gemini": cfg.gemini_llm.llm_gemini_model,
165
+ }
166
+ model_info = f"{cfg.provider.llm_provider}:{models.get(cfg.provider.llm_provider, '')}"
167
+
168
+ processed = await process_and_update_clipboard(
169
+ system_prompt=SYSTEM_PROMPT,
170
+ agent_instructions=AGENT_INSTRUCTIONS,
171
+ provider_cfg=cfg.provider,
172
+ ollama_cfg=cfg.ollama,
173
+ openai_cfg=cfg.openai_llm,
174
+ gemini_cfg=cfg.gemini_llm,
175
+ logger=LOGGER,
176
+ original_text=transcript,
177
+ instruction=INSTRUCTION,
178
+ clipboard=False,
179
+ quiet=True,
180
+ live=None,
181
+ context=None,
182
+ )
183
+
184
+ if not cfg.quiet and processed and processed != transcript:
185
+ console.print(f" [dim]→[/dim] [green]{processed}[/green]")
186
+
187
+ # Copy to clipboard if enabled
188
+ if cfg.clipboard:
189
+ import pyperclip # noqa: PLC0415
190
+
191
+ text_to_copy = processed if processed else transcript
192
+ pyperclip.copy(text_to_copy)
193
+
194
+ # Log
195
+ asr_model: str = cfg.provider.asr_provider
196
+ if cfg.provider.asr_provider == "openai":
197
+ asr_model += f":{cfg.openai_asr.asr_openai_model}"
198
+
199
+ _log_segment(
200
+ cfg.log_file,
201
+ timestamp=timestamp,
202
+ role=cfg.role,
203
+ raw_output=transcript,
204
+ processed_output=processed,
205
+ audio_file=audio_path,
206
+ duration_seconds=duration,
207
+ model_info=model_info or asr_model,
208
+ )
209
+
210
+ if not cfg.quiet:
211
+ console.print("[green]👂 Listening...[/green]" + " " * 20, end="\r")
212
+
213
+
214
+ async def _daemon_loop(cfg: DaemonConfig) -> None: # noqa: PLR0912, PLR0915
215
+ """Main daemon loop: continuously capture audio and process speech segments."""
216
+ stream_config = setup_input_stream(cfg.input_device_index)
217
+ background_tasks: set[asyncio.Task[None]] = set()
218
+
219
+ if not cfg.quiet:
220
+ print_with_style("🎙️ Transcribe daemon started. Listening...", style="green")
221
+ print_with_style(f" Role: {cfg.role}", style="dim")
222
+ print_with_style(f" Log file: {cfg.log_file}", style="dim")
223
+ if cfg.save_audio:
224
+ print_with_style(f" Audio dir: {cfg.audio_dir}", style="dim")
225
+ print_with_style(" Press Ctrl+C to stop.", style="dim")
226
+ console.print()
227
+
228
+ was_speaking = False
229
+ shutdown_event = asyncio.Event()
230
+
231
+ loop = asyncio.get_running_loop()
232
+ for sig in (signal.SIGINT, signal.SIGTERM):
233
+ loop.add_signal_handler(sig, shutdown_event.set)
234
+
235
+ with open_audio_stream(stream_config) as stream:
236
+ try:
237
+ while not shutdown_event.is_set():
238
+ try:
239
+ data, _ = await asyncio.to_thread(stream.read, constants.AUDIO_CHUNK_SIZE)
240
+ chunk = data.tobytes()
241
+ except asyncio.CancelledError:
242
+ break
243
+ except Exception:
244
+ LOGGER.exception("Error reading audio stream")
245
+ await asyncio.sleep(0.1)
246
+ continue
247
+
248
+ is_speaking, segment = cfg.vad.process_chunk(chunk)
249
+
250
+ if not cfg.quiet:
251
+ if is_speaking and not was_speaking:
252
+ console.print("[red]🔴 Recording...[/red]", end="\r")
253
+ elif not is_speaking and was_speaking and segment is None:
254
+ console.print("[yellow]⏸️ Pause detected...[/yellow]", end="\r")
255
+
256
+ was_speaking = is_speaking
257
+
258
+ if segment:
259
+ timestamp = datetime.now(UTC).astimezone()
260
+ duration = cfg.vad.get_segment_duration_seconds(segment)
261
+
262
+ if not cfg.quiet:
263
+ console.print(
264
+ f"[blue]⏳ Processing {duration:.1f}s segment...[/blue]",
265
+ end="\r",
266
+ )
267
+
268
+ LOGGER.debug("Speech segment detected, %.2f seconds", duration)
269
+
270
+ task = asyncio.create_task(_process_segment(cfg, segment, timestamp))
271
+ background_tasks.add(task)
272
+ task.add_done_callback(background_tasks.discard)
273
+
274
+ except (KeyboardInterrupt, asyncio.CancelledError):
275
+ LOGGER.debug("Shutdown signal received")
276
+ finally:
277
+ for sig in (signal.SIGINT, signal.SIGTERM):
278
+ with suppress(ValueError):
279
+ loop.remove_signal_handler(sig)
280
+ with suppress(Exception):
281
+ stream.abort()
282
+ for task in background_tasks:
283
+ if not task.done():
284
+ task.cancel()
285
+ if background_tasks:
286
+ with suppress(asyncio.TimeoutError):
287
+ await asyncio.wait(background_tasks, timeout=2.0)
288
+
289
+
290
+ @app.command("transcribe-daemon", rich_help_panel="Voice Commands")
291
+ @requires_extras("audio", "vad", "llm")
292
+ def transcribe_daemon( # noqa: PLR0912
293
+ *,
294
+ # Daemon-specific options
295
+ role: str = typer.Option(
296
+ "user",
297
+ "--role",
298
+ "-r",
299
+ help="Role name for logging (e.g., 'meeting', 'notes', 'user').",
300
+ ),
301
+ silence_threshold: float = typer.Option(
302
+ 1.0,
303
+ "--silence-threshold",
304
+ "-s",
305
+ help="Seconds of silence to end a speech segment.",
306
+ ),
307
+ min_segment: float = typer.Option(
308
+ 0.25,
309
+ "--min-segment",
310
+ "-m",
311
+ help="Minimum speech duration in seconds to trigger a segment.",
312
+ ),
313
+ vad_threshold: float = typer.Option(
314
+ 0.3,
315
+ "--vad-threshold",
316
+ help="VAD speech detection threshold (0.0-1.0). Higher = more aggressive filtering.",
317
+ ),
318
+ save_audio: bool = typer.Option(
319
+ True, # noqa: FBT003
320
+ "--save-audio/--no-save-audio",
321
+ help="Save audio segments as MP3 files.",
322
+ ),
323
+ audio_dir: Path | None = typer.Option( # noqa: B008
324
+ None,
325
+ "--audio-dir",
326
+ help="Directory for MP3 files. Default: ~/.config/agent-cli/audio",
327
+ ),
328
+ transcription_log: Path | None = typer.Option( # noqa: B008
329
+ None,
330
+ "--transcription-log",
331
+ "-t",
332
+ help="JSON Lines log file path. Default: ~/.config/agent-cli/transcriptions.jsonl",
333
+ ),
334
+ clipboard: bool = typer.Option(
335
+ False, # noqa: FBT003
336
+ "--clipboard/--no-clipboard",
337
+ help="Copy each transcription to clipboard.",
338
+ ),
339
+ # --- Provider Selection ---
340
+ asr_provider: str = opts.ASR_PROVIDER,
341
+ llm_provider: str = opts.LLM_PROVIDER,
342
+ # --- ASR (Audio) Configuration ---
343
+ input_device_index: int | None = opts.INPUT_DEVICE_INDEX,
344
+ input_device_name: str | None = opts.INPUT_DEVICE_NAME,
345
+ asr_wyoming_ip: str = opts.ASR_WYOMING_IP,
346
+ asr_wyoming_port: int = opts.ASR_WYOMING_PORT,
347
+ asr_openai_model: str = opts.ASR_OPENAI_MODEL,
348
+ asr_openai_base_url: str | None = opts.ASR_OPENAI_BASE_URL,
349
+ asr_openai_prompt: str | None = opts.ASR_OPENAI_PROMPT,
350
+ asr_gemini_model: str = opts.ASR_GEMINI_MODEL,
351
+ # --- LLM Configuration ---
352
+ llm_ollama_model: str = opts.LLM_OLLAMA_MODEL,
353
+ llm_ollama_host: str = opts.LLM_OLLAMA_HOST,
354
+ llm_openai_model: str = opts.LLM_OPENAI_MODEL,
355
+ openai_api_key: str | None = opts.OPENAI_API_KEY,
356
+ openai_base_url: str | None = opts.OPENAI_BASE_URL,
357
+ llm_gemini_model: str = opts.LLM_GEMINI_MODEL,
358
+ gemini_api_key: str | None = opts.GEMINI_API_KEY,
359
+ llm: bool = opts.LLM,
360
+ # --- Process Management ---
361
+ stop: bool = opts.STOP,
362
+ status: bool = opts.STATUS,
363
+ # --- General Options ---
364
+ log_level: opts.LogLevel = opts.LOG_LEVEL,
365
+ log_file_logging: str | None = opts.LOG_FILE,
366
+ list_devices: bool = opts.LIST_DEVICES,
367
+ quiet: bool = opts.QUIET,
368
+ config_file: str | None = opts.CONFIG_FILE,
369
+ print_args: bool = opts.PRINT_ARGS,
370
+ ) -> None:
371
+ """Run a continuous transcription daemon with voice activity detection.
372
+
373
+ This command runs indefinitely, capturing audio from your microphone,
374
+ detecting speech segments using Silero VAD, transcribing them, and
375
+ logging results with timestamps.
376
+
377
+ Examples:
378
+ # Basic daemon
379
+ agent-cli transcribe-daemon
380
+
381
+ # With role and custom silence threshold
382
+ agent-cli transcribe-daemon --role meeting --silence-threshold 1.5
383
+
384
+ # With LLM cleanup
385
+ agent-cli transcribe-daemon --llm --role notes
386
+
387
+ # Custom log file and audio directory
388
+ agent-cli transcribe-daemon --transcription-log ~/meeting.jsonl --audio-dir ~/audio
389
+
390
+ """
391
+ if print_args:
392
+ print_command_line_args(locals())
393
+ setup_logging(log_level, log_file_logging, quiet=quiet)
394
+
395
+ process_name = "transcribe-daemon"
396
+
397
+ # Handle stop/status commands
398
+ if stop:
399
+ if process.kill_process(process_name):
400
+ if not quiet:
401
+ print_with_style(f"✅ Stopped {process_name}", style="green")
402
+ elif not quiet:
403
+ print_with_style(f"⚠️ {process_name} is not running", style="yellow")
404
+ return
405
+
406
+ if status:
407
+ if process.is_process_running(process_name):
408
+ if not quiet:
409
+ print_with_style(f"✅ {process_name} is running", style="green")
410
+ elif not quiet:
411
+ print_with_style(f"⚠️ {process_name} is not running", style="yellow")
412
+ return
413
+
414
+ # Validate VAD threshold
415
+ if vad_threshold < 0.0 or vad_threshold > 1.0:
416
+ print_with_style("❌ VAD threshold must be 0.0-1.0", style="red")
417
+ raise typer.Exit(1)
418
+
419
+ # Check FFmpeg availability if saving audio
420
+ if save_audio and not check_ffmpeg_available():
421
+ print_with_style(
422
+ "⚠️ FFmpeg not found. Audio saving disabled. Install FFmpeg for MP3 support.",
423
+ style="yellow",
424
+ )
425
+ save_audio = False
426
+
427
+ # Setup audio device
428
+ general_cfg = config.General(
429
+ log_level=log_level,
430
+ log_file=log_file_logging,
431
+ quiet=quiet,
432
+ list_devices=list_devices,
433
+ clipboard=False,
434
+ )
435
+ audio_in_cfg = config.AudioInput(
436
+ input_device_index=input_device_index,
437
+ input_device_name=input_device_name,
438
+ )
439
+ device_info = setup_devices(general_cfg, audio_in_cfg, None)
440
+ if device_info is None:
441
+ return
442
+ resolved_input_device_index, _, _ = device_info
443
+
444
+ # Import VAD here to avoid loading torch/numpy at module import time
445
+ from agent_cli.core.vad import VoiceActivityDetector # noqa: PLC0415
446
+
447
+ # Create daemon config
448
+ cfg = DaemonConfig(
449
+ role=role,
450
+ vad=VoiceActivityDetector(
451
+ threshold=vad_threshold,
452
+ silence_threshold_ms=int(silence_threshold * 1000),
453
+ min_speech_duration_ms=int(min_segment * 1000),
454
+ ),
455
+ input_device_index=resolved_input_device_index,
456
+ provider=config.ProviderSelection(
457
+ asr_provider=asr_provider,
458
+ llm_provider=llm_provider,
459
+ tts_provider="wyoming",
460
+ ),
461
+ wyoming_asr=config.WyomingASR(
462
+ asr_wyoming_ip=asr_wyoming_ip,
463
+ asr_wyoming_port=asr_wyoming_port,
464
+ ),
465
+ openai_asr=config.OpenAIASR(
466
+ asr_openai_model=asr_openai_model,
467
+ openai_api_key=openai_api_key,
468
+ openai_base_url=asr_openai_base_url or openai_base_url,
469
+ asr_openai_prompt=asr_openai_prompt,
470
+ ),
471
+ gemini_asr=config.GeminiASR(
472
+ asr_gemini_model=asr_gemini_model,
473
+ gemini_api_key=gemini_api_key,
474
+ ),
475
+ ollama=config.Ollama(llm_ollama_model=llm_ollama_model, llm_ollama_host=llm_ollama_host),
476
+ openai_llm=config.OpenAILLM(
477
+ llm_openai_model=llm_openai_model,
478
+ openai_api_key=openai_api_key,
479
+ openai_base_url=openai_base_url,
480
+ ),
481
+ gemini_llm=config.GeminiLLM(
482
+ llm_gemini_model=llm_gemini_model,
483
+ gemini_api_key=gemini_api_key,
484
+ ),
485
+ llm_enabled=llm,
486
+ save_audio=save_audio,
487
+ audio_dir=audio_dir.expanduser() if audio_dir else _DEFAULT_AUDIO_DIR,
488
+ log_file=transcription_log.expanduser() if transcription_log else _DEFAULT_LOG_FILE,
489
+ quiet=quiet,
490
+ clipboard=clipboard,
491
+ )
492
+
493
+ # Run the daemon
494
+ with process.pid_file_context(process_name), suppress(KeyboardInterrupt):
495
+ asyncio.run(_daemon_loop(cfg))
496
+
497
+ if not quiet:
498
+ console.print()
499
+ print_with_style("👋 Transcribe daemon stopped.", style="yellow")