agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. agent_cli/__init__.py +5 -0
  2. agent_cli/__main__.py +6 -0
  3. agent_cli/_extras.json +14 -0
  4. agent_cli/_requirements/.gitkeep +0 -0
  5. agent_cli/_requirements/audio.txt +79 -0
  6. agent_cli/_requirements/faster-whisper.txt +215 -0
  7. agent_cli/_requirements/kokoro.txt +425 -0
  8. agent_cli/_requirements/llm.txt +183 -0
  9. agent_cli/_requirements/memory.txt +355 -0
  10. agent_cli/_requirements/mlx-whisper.txt +222 -0
  11. agent_cli/_requirements/piper.txt +176 -0
  12. agent_cli/_requirements/rag.txt +402 -0
  13. agent_cli/_requirements/server.txt +154 -0
  14. agent_cli/_requirements/speed.txt +77 -0
  15. agent_cli/_requirements/vad.txt +155 -0
  16. agent_cli/_requirements/wyoming.txt +71 -0
  17. agent_cli/_tools.py +368 -0
  18. agent_cli/agents/__init__.py +23 -0
  19. agent_cli/agents/_voice_agent_common.py +136 -0
  20. agent_cli/agents/assistant.py +383 -0
  21. agent_cli/agents/autocorrect.py +284 -0
  22. agent_cli/agents/chat.py +496 -0
  23. agent_cli/agents/memory/__init__.py +31 -0
  24. agent_cli/agents/memory/add.py +190 -0
  25. agent_cli/agents/memory/proxy.py +160 -0
  26. agent_cli/agents/rag_proxy.py +128 -0
  27. agent_cli/agents/speak.py +209 -0
  28. agent_cli/agents/transcribe.py +671 -0
  29. agent_cli/agents/transcribe_daemon.py +499 -0
  30. agent_cli/agents/voice_edit.py +291 -0
  31. agent_cli/api.py +22 -0
  32. agent_cli/cli.py +106 -0
  33. agent_cli/config.py +503 -0
  34. agent_cli/config_cmd.py +307 -0
  35. agent_cli/constants.py +27 -0
  36. agent_cli/core/__init__.py +1 -0
  37. agent_cli/core/audio.py +461 -0
  38. agent_cli/core/audio_format.py +299 -0
  39. agent_cli/core/chroma.py +88 -0
  40. agent_cli/core/deps.py +191 -0
  41. agent_cli/core/openai_proxy.py +139 -0
  42. agent_cli/core/process.py +195 -0
  43. agent_cli/core/reranker.py +120 -0
  44. agent_cli/core/sse.py +87 -0
  45. agent_cli/core/transcription_logger.py +70 -0
  46. agent_cli/core/utils.py +526 -0
  47. agent_cli/core/vad.py +175 -0
  48. agent_cli/core/watch.py +65 -0
  49. agent_cli/dev/__init__.py +14 -0
  50. agent_cli/dev/cli.py +1588 -0
  51. agent_cli/dev/coding_agents/__init__.py +19 -0
  52. agent_cli/dev/coding_agents/aider.py +24 -0
  53. agent_cli/dev/coding_agents/base.py +167 -0
  54. agent_cli/dev/coding_agents/claude.py +39 -0
  55. agent_cli/dev/coding_agents/codex.py +24 -0
  56. agent_cli/dev/coding_agents/continue_dev.py +15 -0
  57. agent_cli/dev/coding_agents/copilot.py +24 -0
  58. agent_cli/dev/coding_agents/cursor_agent.py +48 -0
  59. agent_cli/dev/coding_agents/gemini.py +28 -0
  60. agent_cli/dev/coding_agents/opencode.py +15 -0
  61. agent_cli/dev/coding_agents/registry.py +49 -0
  62. agent_cli/dev/editors/__init__.py +19 -0
  63. agent_cli/dev/editors/base.py +89 -0
  64. agent_cli/dev/editors/cursor.py +15 -0
  65. agent_cli/dev/editors/emacs.py +46 -0
  66. agent_cli/dev/editors/jetbrains.py +56 -0
  67. agent_cli/dev/editors/nano.py +31 -0
  68. agent_cli/dev/editors/neovim.py +33 -0
  69. agent_cli/dev/editors/registry.py +59 -0
  70. agent_cli/dev/editors/sublime.py +20 -0
  71. agent_cli/dev/editors/vim.py +42 -0
  72. agent_cli/dev/editors/vscode.py +15 -0
  73. agent_cli/dev/editors/zed.py +20 -0
  74. agent_cli/dev/project.py +568 -0
  75. agent_cli/dev/registry.py +52 -0
  76. agent_cli/dev/skill/SKILL.md +141 -0
  77. agent_cli/dev/skill/examples.md +571 -0
  78. agent_cli/dev/terminals/__init__.py +19 -0
  79. agent_cli/dev/terminals/apple_terminal.py +82 -0
  80. agent_cli/dev/terminals/base.py +56 -0
  81. agent_cli/dev/terminals/gnome.py +51 -0
  82. agent_cli/dev/terminals/iterm2.py +84 -0
  83. agent_cli/dev/terminals/kitty.py +77 -0
  84. agent_cli/dev/terminals/registry.py +48 -0
  85. agent_cli/dev/terminals/tmux.py +58 -0
  86. agent_cli/dev/terminals/warp.py +132 -0
  87. agent_cli/dev/terminals/zellij.py +78 -0
  88. agent_cli/dev/worktree.py +856 -0
  89. agent_cli/docs_gen.py +417 -0
  90. agent_cli/example-config.toml +185 -0
  91. agent_cli/install/__init__.py +5 -0
  92. agent_cli/install/common.py +89 -0
  93. agent_cli/install/extras.py +174 -0
  94. agent_cli/install/hotkeys.py +48 -0
  95. agent_cli/install/services.py +87 -0
  96. agent_cli/memory/__init__.py +7 -0
  97. agent_cli/memory/_files.py +250 -0
  98. agent_cli/memory/_filters.py +63 -0
  99. agent_cli/memory/_git.py +157 -0
  100. agent_cli/memory/_indexer.py +142 -0
  101. agent_cli/memory/_ingest.py +408 -0
  102. agent_cli/memory/_persistence.py +182 -0
  103. agent_cli/memory/_prompt.py +91 -0
  104. agent_cli/memory/_retrieval.py +294 -0
  105. agent_cli/memory/_store.py +169 -0
  106. agent_cli/memory/_streaming.py +44 -0
  107. agent_cli/memory/_tasks.py +48 -0
  108. agent_cli/memory/api.py +113 -0
  109. agent_cli/memory/client.py +272 -0
  110. agent_cli/memory/engine.py +361 -0
  111. agent_cli/memory/entities.py +43 -0
  112. agent_cli/memory/models.py +112 -0
  113. agent_cli/opts.py +433 -0
  114. agent_cli/py.typed +0 -0
  115. agent_cli/rag/__init__.py +3 -0
  116. agent_cli/rag/_indexer.py +67 -0
  117. agent_cli/rag/_indexing.py +226 -0
  118. agent_cli/rag/_prompt.py +30 -0
  119. agent_cli/rag/_retriever.py +156 -0
  120. agent_cli/rag/_store.py +48 -0
  121. agent_cli/rag/_utils.py +218 -0
  122. agent_cli/rag/api.py +175 -0
  123. agent_cli/rag/client.py +299 -0
  124. agent_cli/rag/engine.py +302 -0
  125. agent_cli/rag/models.py +55 -0
  126. agent_cli/scripts/.runtime/.gitkeep +0 -0
  127. agent_cli/scripts/__init__.py +1 -0
  128. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  129. agent_cli/scripts/linux-hotkeys/README.md +63 -0
  130. agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
  131. agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
  132. agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
  133. agent_cli/scripts/macos-hotkeys/README.md +45 -0
  134. agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
  135. agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
  136. agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
  137. agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
  138. agent_cli/scripts/nvidia-asr-server/README.md +99 -0
  139. agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
  140. agent_cli/scripts/nvidia-asr-server/server.py +255 -0
  141. agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
  142. agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
  143. agent_cli/scripts/run-openwakeword.sh +11 -0
  144. agent_cli/scripts/run-piper-windows.ps1 +30 -0
  145. agent_cli/scripts/run-piper.sh +24 -0
  146. agent_cli/scripts/run-whisper-linux.sh +40 -0
  147. agent_cli/scripts/run-whisper-macos.sh +6 -0
  148. agent_cli/scripts/run-whisper-windows.ps1 +51 -0
  149. agent_cli/scripts/run-whisper.sh +9 -0
  150. agent_cli/scripts/run_faster_whisper_server.py +136 -0
  151. agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
  152. agent_cli/scripts/setup-linux.sh +108 -0
  153. agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
  154. agent_cli/scripts/setup-macos.sh +76 -0
  155. agent_cli/scripts/setup-windows.ps1 +63 -0
  156. agent_cli/scripts/start-all-services-windows.ps1 +53 -0
  157. agent_cli/scripts/start-all-services.sh +178 -0
  158. agent_cli/scripts/sync_extras.py +138 -0
  159. agent_cli/server/__init__.py +3 -0
  160. agent_cli/server/cli.py +721 -0
  161. agent_cli/server/common.py +222 -0
  162. agent_cli/server/model_manager.py +288 -0
  163. agent_cli/server/model_registry.py +225 -0
  164. agent_cli/server/proxy/__init__.py +3 -0
  165. agent_cli/server/proxy/api.py +444 -0
  166. agent_cli/server/streaming.py +67 -0
  167. agent_cli/server/tts/__init__.py +3 -0
  168. agent_cli/server/tts/api.py +335 -0
  169. agent_cli/server/tts/backends/__init__.py +82 -0
  170. agent_cli/server/tts/backends/base.py +139 -0
  171. agent_cli/server/tts/backends/kokoro.py +403 -0
  172. agent_cli/server/tts/backends/piper.py +253 -0
  173. agent_cli/server/tts/model_manager.py +201 -0
  174. agent_cli/server/tts/model_registry.py +28 -0
  175. agent_cli/server/tts/wyoming_handler.py +249 -0
  176. agent_cli/server/whisper/__init__.py +3 -0
  177. agent_cli/server/whisper/api.py +413 -0
  178. agent_cli/server/whisper/backends/__init__.py +89 -0
  179. agent_cli/server/whisper/backends/base.py +97 -0
  180. agent_cli/server/whisper/backends/faster_whisper.py +225 -0
  181. agent_cli/server/whisper/backends/mlx.py +270 -0
  182. agent_cli/server/whisper/languages.py +116 -0
  183. agent_cli/server/whisper/model_manager.py +157 -0
  184. agent_cli/server/whisper/model_registry.py +28 -0
  185. agent_cli/server/whisper/wyoming_handler.py +203 -0
  186. agent_cli/services/__init__.py +343 -0
  187. agent_cli/services/_wyoming_utils.py +64 -0
  188. agent_cli/services/asr.py +506 -0
  189. agent_cli/services/llm.py +228 -0
  190. agent_cli/services/tts.py +450 -0
  191. agent_cli/services/wake_word.py +142 -0
  192. agent_cli-0.70.5.dist-info/METADATA +2118 -0
  193. agent_cli-0.70.5.dist-info/RECORD +196 -0
  194. agent_cli-0.70.5.dist-info/WHEEL +4 -0
  195. agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
  196. agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,136 @@
1
+ r"""Common functionalities for voice-based agents."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import logging
6
+ import time
7
+ from typing import TYPE_CHECKING
8
+
9
+ from agent_cli.core.utils import print_input_panel, print_with_style
10
+ from agent_cli.services import asr
11
+ from agent_cli.services.llm import process_and_update_clipboard
12
+ from agent_cli.services.tts import handle_tts_playback
13
+
14
+ if TYPE_CHECKING:
15
+ from rich.live import Live
16
+
17
+ from agent_cli import config
18
+
19
+ LOGGER = logging.getLogger()
20
+
21
+
22
+ async def get_instruction_from_audio(
23
+ *,
24
+ audio_data: bytes,
25
+ provider_cfg: config.ProviderSelection,
26
+ audio_input_cfg: config.AudioInput,
27
+ wyoming_asr_cfg: config.WyomingASR,
28
+ openai_asr_cfg: config.OpenAIASR,
29
+ gemini_asr_cfg: config.GeminiASR,
30
+ ollama_cfg: config.Ollama,
31
+ logger: logging.Logger,
32
+ quiet: bool,
33
+ ) -> str | None:
34
+ """Transcribe audio data and return the instruction."""
35
+ try:
36
+ start_time = time.monotonic()
37
+ transcriber = asr.create_recorded_audio_transcriber(provider_cfg)
38
+ instruction = await transcriber(
39
+ audio_data=audio_data,
40
+ provider_cfg=provider_cfg,
41
+ audio_input_cfg=audio_input_cfg,
42
+ wyoming_asr_cfg=wyoming_asr_cfg,
43
+ openai_asr_cfg=openai_asr_cfg,
44
+ gemini_asr_cfg=gemini_asr_cfg,
45
+ ollama_cfg=ollama_cfg,
46
+ logger=logger,
47
+ quiet=quiet,
48
+ )
49
+ elapsed = time.monotonic() - start_time
50
+
51
+ if not instruction or not instruction.strip():
52
+ if not quiet:
53
+ print_with_style(
54
+ "No speech detected in recording",
55
+ style="yellow",
56
+ )
57
+ return None
58
+
59
+ if not quiet:
60
+ print_input_panel(
61
+ instruction,
62
+ title="🎯 Instruction",
63
+ style="bold yellow",
64
+ subtitle=f"[dim]took {elapsed:.2f}s[/dim]",
65
+ )
66
+
67
+ return instruction
68
+
69
+ except Exception as e:
70
+ logger.exception("Failed to process audio with ASR")
71
+ if not quiet:
72
+ print_with_style(f"ASR processing failed: {e}", style="red")
73
+ return None
74
+
75
+
76
+ async def process_instruction_and_respond(
77
+ *,
78
+ instruction: str,
79
+ original_text: str,
80
+ provider_cfg: config.ProviderSelection,
81
+ general_cfg: config.General,
82
+ ollama_cfg: config.Ollama,
83
+ openai_llm_cfg: config.OpenAILLM,
84
+ gemini_llm_cfg: config.GeminiLLM,
85
+ audio_output_cfg: config.AudioOutput,
86
+ wyoming_tts_cfg: config.WyomingTTS,
87
+ openai_tts_cfg: config.OpenAITTS,
88
+ kokoro_tts_cfg: config.KokoroTTS,
89
+ gemini_tts_cfg: config.GeminiTTS | None = None,
90
+ system_prompt: str,
91
+ agent_instructions: str,
92
+ live: Live | None,
93
+ logger: logging.Logger,
94
+ ) -> str | None:
95
+ """Process instruction with LLM and handle TTS response.
96
+
97
+ Returns the processed text, or None if processing failed.
98
+ """
99
+ result: str | None = None
100
+ # Process with LLM if clipboard mode is enabled
101
+ if general_cfg.clipboard:
102
+ result = await process_and_update_clipboard(
103
+ system_prompt=system_prompt,
104
+ agent_instructions=agent_instructions,
105
+ provider_cfg=provider_cfg,
106
+ ollama_cfg=ollama_cfg,
107
+ openai_cfg=openai_llm_cfg,
108
+ gemini_cfg=gemini_llm_cfg,
109
+ logger=logger,
110
+ original_text=original_text,
111
+ instruction=instruction,
112
+ clipboard=general_cfg.clipboard,
113
+ quiet=general_cfg.quiet,
114
+ live=live,
115
+ )
116
+
117
+ # Handle TTS response if enabled
118
+ if audio_output_cfg.enable_tts and result and result.strip():
119
+ await handle_tts_playback(
120
+ text=result,
121
+ provider_cfg=provider_cfg,
122
+ audio_output_cfg=audio_output_cfg,
123
+ wyoming_tts_cfg=wyoming_tts_cfg,
124
+ openai_tts_cfg=openai_tts_cfg,
125
+ kokoro_tts_cfg=kokoro_tts_cfg,
126
+ gemini_tts_cfg=gemini_tts_cfg,
127
+ save_file=general_cfg.save_file,
128
+ quiet=general_cfg.quiet,
129
+ logger=logger,
130
+ play_audio=not general_cfg.save_file,
131
+ status_message="🔊 Speaking response...",
132
+ description="TTS audio",
133
+ live=live,
134
+ )
135
+
136
+ return result
@@ -0,0 +1,383 @@
1
+ r"""Wake word-based voice assistant that records when wake word is detected.
2
+
3
+ This agent uses Wyoming wake word detection to implement a hands-free voice assistant that:
4
+ 1. Continuously listens for a wake word
5
+ 2. When the wake word is detected, starts recording user speech
6
+ 3. When the wake word is detected again, stops recording and processes the speech
7
+ 4. Sends the recorded speech to ASR for transcription
8
+ 5. Optionally processes the transcript with an LLM and speaks the response
9
+
10
+ WORKFLOW:
11
+ 1. Agent starts listening for the specified wake word
12
+ 2. First wake word detection -> start recording user speech
13
+ 3. Second wake word detection -> stop recording and process the speech
14
+ 4. Transcribe the recorded speech using Wyoming ASR
15
+ 5. Optionally process with LLM and respond with TTS
16
+
17
+ USAGE:
18
+ - Start the agent: assistant --wake-word "ok_nabu" --input-device-index 1
19
+ - The agent runs continuously until stopped with Ctrl+C or --stop
20
+ - Uses background process management for daemon-like operation
21
+
22
+ REQUIREMENTS:
23
+ - Wyoming wake word server (e.g., wyoming-openwakeword)
24
+ - Wyoming ASR server (e.g., wyoming-whisper)
25
+ - Optional: Wyoming TTS server for responses
26
+ """
27
+
28
+ from __future__ import annotations
29
+
30
+ import asyncio
31
+ import logging
32
+ from contextlib import suppress
33
+ from pathlib import Path # noqa: TC003
34
+ from typing import TYPE_CHECKING
35
+
36
+ from agent_cli import config, opts
37
+ from agent_cli.agents._voice_agent_common import (
38
+ get_instruction_from_audio,
39
+ process_instruction_and_respond,
40
+ )
41
+ from agent_cli.cli import app
42
+ from agent_cli.core import audio, process
43
+ from agent_cli.core.audio import setup_devices
44
+ from agent_cli.core.deps import requires_extras
45
+ from agent_cli.core.utils import (
46
+ InteractiveStopEvent,
47
+ maybe_live,
48
+ print_command_line_args,
49
+ print_with_style,
50
+ setup_logging,
51
+ signal_handling_context,
52
+ stop_or_status_or_toggle,
53
+ )
54
+ from agent_cli.services import asr
55
+ from agent_cli.services.wake_word import create_wake_word_detector
56
+
57
+ if TYPE_CHECKING:
58
+ import sounddevice as sd
59
+ from rich.live import Live
60
+
61
+ LOGGER = logging.getLogger()
62
+
63
+ WAKE_WORD_VARIATIONS = {
64
+ "ok_nabu": ["ok nabu", "okay nabu", "okay, nabu", "ok, nabu", "ok naboo", "okay naboo"],
65
+ "alexa": ["alexa"],
66
+ "hey_jarvis": ["hey jarvis"],
67
+ }
68
+
69
+ # LLM Prompts for wake word assistant
70
+ SYSTEM_PROMPT_TEMPLATE = """\
71
+ You are a helpful voice assistant. Respond to user questions and commands in a conversational, friendly manner.
72
+
73
+ The user is using a wake word to start and stop the recording, so the wake word will always appear at the END of the transcription.
74
+ The wake word is "{wake_word}". You should ignore the wake word and any variations of it (e.g., "{variations}") when processing the user's command.
75
+
76
+ Keep your responses concise but informative. If the user asks you to perform an action that requires external tools or systems, explain what you would do if you had access to those capabilities.
77
+
78
+ Always be helpful, accurate, and engaging in your responses.
79
+ """
80
+
81
+ AGENT_INSTRUCTIONS_TEMPLATE = """\
82
+ The user has spoken a voice command or question. The user is using a wake word to start and stop the recording. The wake word is "{wake_word}". You should ignore the wake word and any variations of it (e.g., "{variations}") when processing the user's command.
83
+
84
+ Provide a helpful, conversational response.
85
+
86
+ If it's a question, answer it clearly and concisely.
87
+ If it's a command, explain what you would do or provide guidance on how to accomplish it.
88
+ If it's unclear, ask for clarification in a friendly way.
89
+
90
+ Respond as if you're having a natural conversation.
91
+ """
92
+
93
+
94
+ async def _record_audio_with_wake_word(
95
+ stream: sd.InputStream,
96
+ stop_event: InteractiveStopEvent,
97
+ logger: logging.Logger,
98
+ *,
99
+ wake_word_cfg: config.WakeWord,
100
+ quiet: bool = False,
101
+ live: Live | None = None,
102
+ ) -> bytes | None:
103
+ """Record audio to a buffer using wake word detection to start and stop."""
104
+ if not quiet:
105
+ print_with_style(
106
+ f"👂 Listening for wake word: [bold yellow]{wake_word_cfg.wake_word}[/bold yellow]",
107
+ )
108
+ print_with_style(
109
+ "Say the wake word to start recording, then say it again to stop and process.",
110
+ style="dim",
111
+ )
112
+
113
+ async with audio.tee_audio_stream(stream, stop_event, logger) as tee:
114
+ # Create a queue for wake word detection
115
+ wake_queue = await tee.add_queue()
116
+
117
+ detector = create_wake_word_detector(wake_word_cfg)
118
+ detected_word = await detector(
119
+ logger=logger,
120
+ queue=wake_queue,
121
+ quiet=quiet,
122
+ live=live,
123
+ )
124
+
125
+ if not detected_word or stop_event.is_set():
126
+ # Clean up the queue if we exit early
127
+ await tee.remove_queue(wake_queue)
128
+ return None
129
+
130
+ if not quiet:
131
+ print_with_style(
132
+ f"✅ Wake word '{detected_word}' detected! Starting recording...",
133
+ style="green",
134
+ )
135
+
136
+ # Add a new queue for recording
137
+ record_queue = await tee.add_queue()
138
+ record_task = asyncio.create_task(asr.record_audio_to_buffer(record_queue, logger))
139
+
140
+ # Use the same wake_queue for stop-word detection
141
+ stop_detected_word = await detector(
142
+ logger=logger,
143
+ queue=wake_queue,
144
+ quiet=quiet,
145
+ live=live,
146
+ progress_message="Recording... (say wake word to stop)",
147
+ )
148
+
149
+ # Stop the recording task by removing its queue
150
+ await tee.remove_queue(record_queue)
151
+ audio_data = await record_task
152
+
153
+ # Clean up the wake queue
154
+ await tee.remove_queue(wake_queue)
155
+
156
+ if not stop_detected_word or stop_event.is_set():
157
+ return None
158
+
159
+ if not quiet:
160
+ print_with_style(
161
+ f"🛑 Wake word '{stop_detected_word}' detected! Stopping recording...",
162
+ style="yellow",
163
+ )
164
+
165
+ return audio_data
166
+
167
+
168
+ async def _async_main(
169
+ *,
170
+ provider_cfg: config.ProviderSelection,
171
+ general_cfg: config.General,
172
+ audio_in_cfg: config.AudioInput,
173
+ wyoming_asr_cfg: config.WyomingASR,
174
+ openai_asr_cfg: config.OpenAIASR,
175
+ gemini_asr_cfg: config.GeminiASR,
176
+ ollama_cfg: config.Ollama,
177
+ openai_llm_cfg: config.OpenAILLM,
178
+ gemini_llm_cfg: config.GeminiLLM,
179
+ audio_out_cfg: config.AudioOutput,
180
+ wyoming_tts_cfg: config.WyomingTTS,
181
+ openai_tts_cfg: config.OpenAITTS,
182
+ kokoro_tts_cfg: config.KokoroTTS,
183
+ gemini_tts_cfg: config.GeminiTTS,
184
+ wake_word_cfg: config.WakeWord,
185
+ system_prompt: str,
186
+ agent_instructions: str,
187
+ live: Live | None,
188
+ ) -> None:
189
+ """Core asynchronous logic for the wake word assistant."""
190
+ device_info = setup_devices(general_cfg, audio_in_cfg, audio_out_cfg)
191
+ if device_info is None:
192
+ return
193
+ input_device_index, _, tts_output_device_index = device_info
194
+ audio_in_cfg.input_device_index = input_device_index
195
+ audio_out_cfg.output_device_index = tts_output_device_index
196
+
197
+ stream_config = audio.setup_input_stream(input_device_index)
198
+ with (
199
+ audio.open_audio_stream(stream_config) as stream,
200
+ signal_handling_context(LOGGER, general_cfg.quiet) as stop_event,
201
+ ):
202
+ while not stop_event.is_set():
203
+ audio_data = await _record_audio_with_wake_word(
204
+ stream,
205
+ stop_event,
206
+ LOGGER,
207
+ wake_word_cfg=wake_word_cfg,
208
+ quiet=general_cfg.quiet,
209
+ live=live,
210
+ )
211
+
212
+ if not audio_data:
213
+ if not general_cfg.quiet:
214
+ print_with_style("No audio recorded", style="yellow")
215
+ continue
216
+
217
+ if stop_event.is_set():
218
+ break
219
+
220
+ instruction = await get_instruction_from_audio(
221
+ audio_data=audio_data,
222
+ provider_cfg=provider_cfg,
223
+ audio_input_cfg=audio_in_cfg,
224
+ wyoming_asr_cfg=wyoming_asr_cfg,
225
+ openai_asr_cfg=openai_asr_cfg,
226
+ gemini_asr_cfg=gemini_asr_cfg,
227
+ ollama_cfg=ollama_cfg,
228
+ logger=LOGGER,
229
+ quiet=general_cfg.quiet,
230
+ )
231
+ if not instruction:
232
+ continue
233
+
234
+ await process_instruction_and_respond(
235
+ instruction=instruction,
236
+ original_text="",
237
+ provider_cfg=provider_cfg,
238
+ general_cfg=general_cfg,
239
+ ollama_cfg=ollama_cfg,
240
+ openai_llm_cfg=openai_llm_cfg,
241
+ gemini_llm_cfg=gemini_llm_cfg,
242
+ audio_output_cfg=audio_out_cfg,
243
+ wyoming_tts_cfg=wyoming_tts_cfg,
244
+ openai_tts_cfg=openai_tts_cfg,
245
+ kokoro_tts_cfg=kokoro_tts_cfg,
246
+ gemini_tts_cfg=gemini_tts_cfg,
247
+ system_prompt=system_prompt,
248
+ agent_instructions=agent_instructions,
249
+ live=live,
250
+ logger=LOGGER,
251
+ )
252
+
253
+ if not general_cfg.quiet:
254
+ print_with_style("✨ Ready for next command...", style="green")
255
+
256
+
257
+ @app.command("assistant", rich_help_panel="Voice Commands")
258
+ @requires_extras("audio", "llm")
259
+ def assistant(
260
+ *,
261
+ # --- Provider Selection ---
262
+ asr_provider: str = opts.ASR_PROVIDER,
263
+ llm_provider: str = opts.LLM_PROVIDER,
264
+ tts_provider: str = opts.TTS_PROVIDER,
265
+ # --- Wake Word Configuration ---
266
+ wake_server_ip: str = opts.WAKE_SERVER_IP,
267
+ wake_server_port: int = opts.WAKE_SERVER_PORT,
268
+ wake_word: str = opts.WAKE_WORD,
269
+ # --- ASR (Audio) Configuration ---
270
+ input_device_index: int | None = opts.INPUT_DEVICE_INDEX,
271
+ input_device_name: str | None = opts.INPUT_DEVICE_NAME,
272
+ asr_wyoming_ip: str = opts.ASR_WYOMING_IP,
273
+ asr_wyoming_port: int = opts.ASR_WYOMING_PORT,
274
+ asr_openai_model: str = opts.ASR_OPENAI_MODEL,
275
+ asr_gemini_model: str = opts.ASR_GEMINI_MODEL,
276
+ # --- LLM Configuration ---
277
+ llm_ollama_model: str = opts.LLM_OLLAMA_MODEL,
278
+ llm_ollama_host: str = opts.LLM_OLLAMA_HOST,
279
+ llm_openai_model: str = opts.LLM_OPENAI_MODEL,
280
+ openai_api_key: str | None = opts.OPENAI_API_KEY,
281
+ openai_base_url: str | None = opts.OPENAI_BASE_URL,
282
+ llm_gemini_model: str = opts.LLM_GEMINI_MODEL,
283
+ gemini_api_key: str | None = opts.GEMINI_API_KEY,
284
+ # --- TTS Configuration ---
285
+ enable_tts: bool = opts.ENABLE_TTS,
286
+ output_device_index: int | None = opts.OUTPUT_DEVICE_INDEX,
287
+ output_device_name: str | None = opts.OUTPUT_DEVICE_NAME,
288
+ tts_speed: float = opts.TTS_SPEED,
289
+ tts_wyoming_ip: str = opts.TTS_WYOMING_IP,
290
+ tts_wyoming_port: int = opts.TTS_WYOMING_PORT,
291
+ tts_wyoming_voice: str | None = opts.TTS_WYOMING_VOICE,
292
+ tts_wyoming_language: str | None = opts.TTS_WYOMING_LANGUAGE,
293
+ tts_wyoming_speaker: str | None = opts.TTS_WYOMING_SPEAKER,
294
+ tts_openai_model: str = opts.TTS_OPENAI_MODEL,
295
+ tts_openai_voice: str = opts.TTS_OPENAI_VOICE,
296
+ tts_openai_base_url: str | None = opts.TTS_OPENAI_BASE_URL,
297
+ tts_kokoro_model: str = opts.TTS_KOKORO_MODEL,
298
+ tts_kokoro_voice: str = opts.TTS_KOKORO_VOICE,
299
+ tts_kokoro_host: str = opts.TTS_KOKORO_HOST,
300
+ tts_gemini_model: str = opts.TTS_GEMINI_MODEL,
301
+ tts_gemini_voice: str = opts.TTS_GEMINI_VOICE,
302
+ # --- Process Management ---
303
+ stop: bool = opts.STOP,
304
+ status: bool = opts.STATUS,
305
+ toggle: bool = opts.TOGGLE,
306
+ # --- General Options ---
307
+ save_file: Path | None = opts.SAVE_FILE,
308
+ clipboard: bool = opts.CLIPBOARD,
309
+ log_level: opts.LogLevel = opts.LOG_LEVEL,
310
+ log_file: str | None = opts.LOG_FILE,
311
+ list_devices: bool = opts.LIST_DEVICES,
312
+ quiet: bool = opts.QUIET,
313
+ config_file: str | None = opts.CONFIG_FILE,
314
+ print_args: bool = opts.PRINT_ARGS,
315
+ ) -> None:
316
+ """Wake word-based voice assistant using local or remote services."""
317
+ if print_args:
318
+ print_command_line_args(locals())
319
+
320
+ setup_logging(log_level, log_file, quiet=quiet)
321
+ general_cfg = config.General(
322
+ log_level=log_level,
323
+ log_file=log_file,
324
+ quiet=quiet,
325
+ list_devices=list_devices,
326
+ clipboard=clipboard,
327
+ save_file=save_file,
328
+ )
329
+ process_name = "assistant"
330
+ if stop_or_status_or_toggle(
331
+ process_name,
332
+ "wake word assistant",
333
+ stop,
334
+ status,
335
+ toggle,
336
+ quiet=general_cfg.quiet,
337
+ ):
338
+ return
339
+
340
+ with (
341
+ process.pid_file_context(process_name),
342
+ suppress(KeyboardInterrupt),
343
+ maybe_live(not general_cfg.quiet) as live,
344
+ ):
345
+ cfgs = config.create_provider_configs_from_locals(locals())
346
+ wake_word_cfg = config.WakeWord(
347
+ wake_server_ip=wake_server_ip,
348
+ wake_server_port=wake_server_port,
349
+ wake_word=wake_word,
350
+ )
351
+
352
+ variations = ", ".join(WAKE_WORD_VARIATIONS.get(wake_word_cfg.wake_word, []))
353
+ system_prompt = SYSTEM_PROMPT_TEMPLATE.format(
354
+ wake_word=wake_word_cfg.wake_word,
355
+ variations=variations,
356
+ )
357
+ agent_instructions = AGENT_INSTRUCTIONS_TEMPLATE.format(
358
+ wake_word=wake_word_cfg.wake_word,
359
+ variations=variations,
360
+ )
361
+
362
+ asyncio.run(
363
+ _async_main(
364
+ provider_cfg=cfgs.provider,
365
+ general_cfg=general_cfg,
366
+ audio_in_cfg=cfgs.audio_in,
367
+ wyoming_asr_cfg=cfgs.wyoming_asr,
368
+ openai_asr_cfg=cfgs.openai_asr,
369
+ gemini_asr_cfg=cfgs.gemini_asr,
370
+ ollama_cfg=cfgs.ollama,
371
+ openai_llm_cfg=cfgs.openai_llm,
372
+ gemini_llm_cfg=cfgs.gemini_llm,
373
+ audio_out_cfg=cfgs.audio_out,
374
+ wyoming_tts_cfg=cfgs.wyoming_tts,
375
+ openai_tts_cfg=cfgs.openai_tts,
376
+ kokoro_tts_cfg=cfgs.kokoro_tts,
377
+ gemini_tts_cfg=cfgs.gemini_tts,
378
+ wake_word_cfg=wake_word_cfg,
379
+ system_prompt=system_prompt,
380
+ agent_instructions=agent_instructions,
381
+ live=live,
382
+ ),
383
+ )