agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. agent_cli/__init__.py +5 -0
  2. agent_cli/__main__.py +6 -0
  3. agent_cli/_extras.json +14 -0
  4. agent_cli/_requirements/.gitkeep +0 -0
  5. agent_cli/_requirements/audio.txt +79 -0
  6. agent_cli/_requirements/faster-whisper.txt +215 -0
  7. agent_cli/_requirements/kokoro.txt +425 -0
  8. agent_cli/_requirements/llm.txt +183 -0
  9. agent_cli/_requirements/memory.txt +355 -0
  10. agent_cli/_requirements/mlx-whisper.txt +222 -0
  11. agent_cli/_requirements/piper.txt +176 -0
  12. agent_cli/_requirements/rag.txt +402 -0
  13. agent_cli/_requirements/server.txt +154 -0
  14. agent_cli/_requirements/speed.txt +77 -0
  15. agent_cli/_requirements/vad.txt +155 -0
  16. agent_cli/_requirements/wyoming.txt +71 -0
  17. agent_cli/_tools.py +368 -0
  18. agent_cli/agents/__init__.py +23 -0
  19. agent_cli/agents/_voice_agent_common.py +136 -0
  20. agent_cli/agents/assistant.py +383 -0
  21. agent_cli/agents/autocorrect.py +284 -0
  22. agent_cli/agents/chat.py +496 -0
  23. agent_cli/agents/memory/__init__.py +31 -0
  24. agent_cli/agents/memory/add.py +190 -0
  25. agent_cli/agents/memory/proxy.py +160 -0
  26. agent_cli/agents/rag_proxy.py +128 -0
  27. agent_cli/agents/speak.py +209 -0
  28. agent_cli/agents/transcribe.py +671 -0
  29. agent_cli/agents/transcribe_daemon.py +499 -0
  30. agent_cli/agents/voice_edit.py +291 -0
  31. agent_cli/api.py +22 -0
  32. agent_cli/cli.py +106 -0
  33. agent_cli/config.py +503 -0
  34. agent_cli/config_cmd.py +307 -0
  35. agent_cli/constants.py +27 -0
  36. agent_cli/core/__init__.py +1 -0
  37. agent_cli/core/audio.py +461 -0
  38. agent_cli/core/audio_format.py +299 -0
  39. agent_cli/core/chroma.py +88 -0
  40. agent_cli/core/deps.py +191 -0
  41. agent_cli/core/openai_proxy.py +139 -0
  42. agent_cli/core/process.py +195 -0
  43. agent_cli/core/reranker.py +120 -0
  44. agent_cli/core/sse.py +87 -0
  45. agent_cli/core/transcription_logger.py +70 -0
  46. agent_cli/core/utils.py +526 -0
  47. agent_cli/core/vad.py +175 -0
  48. agent_cli/core/watch.py +65 -0
  49. agent_cli/dev/__init__.py +14 -0
  50. agent_cli/dev/cli.py +1588 -0
  51. agent_cli/dev/coding_agents/__init__.py +19 -0
  52. agent_cli/dev/coding_agents/aider.py +24 -0
  53. agent_cli/dev/coding_agents/base.py +167 -0
  54. agent_cli/dev/coding_agents/claude.py +39 -0
  55. agent_cli/dev/coding_agents/codex.py +24 -0
  56. agent_cli/dev/coding_agents/continue_dev.py +15 -0
  57. agent_cli/dev/coding_agents/copilot.py +24 -0
  58. agent_cli/dev/coding_agents/cursor_agent.py +48 -0
  59. agent_cli/dev/coding_agents/gemini.py +28 -0
  60. agent_cli/dev/coding_agents/opencode.py +15 -0
  61. agent_cli/dev/coding_agents/registry.py +49 -0
  62. agent_cli/dev/editors/__init__.py +19 -0
  63. agent_cli/dev/editors/base.py +89 -0
  64. agent_cli/dev/editors/cursor.py +15 -0
  65. agent_cli/dev/editors/emacs.py +46 -0
  66. agent_cli/dev/editors/jetbrains.py +56 -0
  67. agent_cli/dev/editors/nano.py +31 -0
  68. agent_cli/dev/editors/neovim.py +33 -0
  69. agent_cli/dev/editors/registry.py +59 -0
  70. agent_cli/dev/editors/sublime.py +20 -0
  71. agent_cli/dev/editors/vim.py +42 -0
  72. agent_cli/dev/editors/vscode.py +15 -0
  73. agent_cli/dev/editors/zed.py +20 -0
  74. agent_cli/dev/project.py +568 -0
  75. agent_cli/dev/registry.py +52 -0
  76. agent_cli/dev/skill/SKILL.md +141 -0
  77. agent_cli/dev/skill/examples.md +571 -0
  78. agent_cli/dev/terminals/__init__.py +19 -0
  79. agent_cli/dev/terminals/apple_terminal.py +82 -0
  80. agent_cli/dev/terminals/base.py +56 -0
  81. agent_cli/dev/terminals/gnome.py +51 -0
  82. agent_cli/dev/terminals/iterm2.py +84 -0
  83. agent_cli/dev/terminals/kitty.py +77 -0
  84. agent_cli/dev/terminals/registry.py +48 -0
  85. agent_cli/dev/terminals/tmux.py +58 -0
  86. agent_cli/dev/terminals/warp.py +132 -0
  87. agent_cli/dev/terminals/zellij.py +78 -0
  88. agent_cli/dev/worktree.py +856 -0
  89. agent_cli/docs_gen.py +417 -0
  90. agent_cli/example-config.toml +185 -0
  91. agent_cli/install/__init__.py +5 -0
  92. agent_cli/install/common.py +89 -0
  93. agent_cli/install/extras.py +174 -0
  94. agent_cli/install/hotkeys.py +48 -0
  95. agent_cli/install/services.py +87 -0
  96. agent_cli/memory/__init__.py +7 -0
  97. agent_cli/memory/_files.py +250 -0
  98. agent_cli/memory/_filters.py +63 -0
  99. agent_cli/memory/_git.py +157 -0
  100. agent_cli/memory/_indexer.py +142 -0
  101. agent_cli/memory/_ingest.py +408 -0
  102. agent_cli/memory/_persistence.py +182 -0
  103. agent_cli/memory/_prompt.py +91 -0
  104. agent_cli/memory/_retrieval.py +294 -0
  105. agent_cli/memory/_store.py +169 -0
  106. agent_cli/memory/_streaming.py +44 -0
  107. agent_cli/memory/_tasks.py +48 -0
  108. agent_cli/memory/api.py +113 -0
  109. agent_cli/memory/client.py +272 -0
  110. agent_cli/memory/engine.py +361 -0
  111. agent_cli/memory/entities.py +43 -0
  112. agent_cli/memory/models.py +112 -0
  113. agent_cli/opts.py +433 -0
  114. agent_cli/py.typed +0 -0
  115. agent_cli/rag/__init__.py +3 -0
  116. agent_cli/rag/_indexer.py +67 -0
  117. agent_cli/rag/_indexing.py +226 -0
  118. agent_cli/rag/_prompt.py +30 -0
  119. agent_cli/rag/_retriever.py +156 -0
  120. agent_cli/rag/_store.py +48 -0
  121. agent_cli/rag/_utils.py +218 -0
  122. agent_cli/rag/api.py +175 -0
  123. agent_cli/rag/client.py +299 -0
  124. agent_cli/rag/engine.py +302 -0
  125. agent_cli/rag/models.py +55 -0
  126. agent_cli/scripts/.runtime/.gitkeep +0 -0
  127. agent_cli/scripts/__init__.py +1 -0
  128. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  129. agent_cli/scripts/linux-hotkeys/README.md +63 -0
  130. agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
  131. agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
  132. agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
  133. agent_cli/scripts/macos-hotkeys/README.md +45 -0
  134. agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
  135. agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
  136. agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
  137. agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
  138. agent_cli/scripts/nvidia-asr-server/README.md +99 -0
  139. agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
  140. agent_cli/scripts/nvidia-asr-server/server.py +255 -0
  141. agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
  142. agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
  143. agent_cli/scripts/run-openwakeword.sh +11 -0
  144. agent_cli/scripts/run-piper-windows.ps1 +30 -0
  145. agent_cli/scripts/run-piper.sh +24 -0
  146. agent_cli/scripts/run-whisper-linux.sh +40 -0
  147. agent_cli/scripts/run-whisper-macos.sh +6 -0
  148. agent_cli/scripts/run-whisper-windows.ps1 +51 -0
  149. agent_cli/scripts/run-whisper.sh +9 -0
  150. agent_cli/scripts/run_faster_whisper_server.py +136 -0
  151. agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
  152. agent_cli/scripts/setup-linux.sh +108 -0
  153. agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
  154. agent_cli/scripts/setup-macos.sh +76 -0
  155. agent_cli/scripts/setup-windows.ps1 +63 -0
  156. agent_cli/scripts/start-all-services-windows.ps1 +53 -0
  157. agent_cli/scripts/start-all-services.sh +178 -0
  158. agent_cli/scripts/sync_extras.py +138 -0
  159. agent_cli/server/__init__.py +3 -0
  160. agent_cli/server/cli.py +721 -0
  161. agent_cli/server/common.py +222 -0
  162. agent_cli/server/model_manager.py +288 -0
  163. agent_cli/server/model_registry.py +225 -0
  164. agent_cli/server/proxy/__init__.py +3 -0
  165. agent_cli/server/proxy/api.py +444 -0
  166. agent_cli/server/streaming.py +67 -0
  167. agent_cli/server/tts/__init__.py +3 -0
  168. agent_cli/server/tts/api.py +335 -0
  169. agent_cli/server/tts/backends/__init__.py +82 -0
  170. agent_cli/server/tts/backends/base.py +139 -0
  171. agent_cli/server/tts/backends/kokoro.py +403 -0
  172. agent_cli/server/tts/backends/piper.py +253 -0
  173. agent_cli/server/tts/model_manager.py +201 -0
  174. agent_cli/server/tts/model_registry.py +28 -0
  175. agent_cli/server/tts/wyoming_handler.py +249 -0
  176. agent_cli/server/whisper/__init__.py +3 -0
  177. agent_cli/server/whisper/api.py +413 -0
  178. agent_cli/server/whisper/backends/__init__.py +89 -0
  179. agent_cli/server/whisper/backends/base.py +97 -0
  180. agent_cli/server/whisper/backends/faster_whisper.py +225 -0
  181. agent_cli/server/whisper/backends/mlx.py +270 -0
  182. agent_cli/server/whisper/languages.py +116 -0
  183. agent_cli/server/whisper/model_manager.py +157 -0
  184. agent_cli/server/whisper/model_registry.py +28 -0
  185. agent_cli/server/whisper/wyoming_handler.py +203 -0
  186. agent_cli/services/__init__.py +343 -0
  187. agent_cli/services/_wyoming_utils.py +64 -0
  188. agent_cli/services/asr.py +506 -0
  189. agent_cli/services/llm.py +228 -0
  190. agent_cli/services/tts.py +450 -0
  191. agent_cli/services/wake_word.py +142 -0
  192. agent_cli-0.70.5.dist-info/METADATA +2118 -0
  193. agent_cli-0.70.5.dist-info/RECORD +196 -0
  194. agent_cli-0.70.5.dist-info/WHEEL +4 -0
  195. agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
  196. agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,506 @@
1
+ """Module for Automatic Speech Recognition using Wyoming or OpenAI."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import io
7
+ import wave
8
+ from datetime import UTC, datetime
9
+ from functools import partial
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING
12
+
13
+ from agent_cli import constants
14
+ from agent_cli.core.audio import (
15
+ open_audio_stream,
16
+ read_audio_stream,
17
+ read_from_queue,
18
+ setup_input_stream,
19
+ )
20
+ from agent_cli.core.audio_format import check_ffmpeg_available, convert_audio_to_wyoming_format
21
+ from agent_cli.core.utils import manage_send_receive_tasks
22
+ from agent_cli.services import (
23
+ transcribe_audio_gemini,
24
+ transcribe_audio_openai,
25
+ )
26
+ from agent_cli.services._wyoming_utils import wyoming_client_context
27
+
28
+ if TYPE_CHECKING:
29
+ import logging
30
+ from collections.abc import Awaitable, Callable
31
+
32
+ import sounddevice as sd
33
+ from rich.live import Live
34
+ from wyoming.client import AsyncClient
35
+
36
+ from agent_cli import config
37
+ from agent_cli.core.utils import InteractiveStopEvent
38
+
39
+
40
+ def _get_transcriptions_dir() -> Path:
41
+ """Get the directory for storing transcription recordings."""
42
+ config_dir = Path.home() / ".config" / "agent-cli" / "transcriptions"
43
+ config_dir.mkdir(parents=True, exist_ok=True)
44
+ return config_dir
45
+
46
+
47
+ def _save_audio_to_file(audio_data: bytes, logger: logging.Logger) -> Path | None:
48
+ """Save audio data to a WAV file with timestamp-based filename.
49
+
50
+ Returns the path to the saved file, or None if saving failed.
51
+ """
52
+ try:
53
+ timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S_%f")[:-3] # Include milliseconds
54
+ filename = f"recording_{timestamp}.wav"
55
+ filepath = _get_transcriptions_dir() / filename
56
+
57
+ with wave.open(str(filepath), "wb") as wav_file:
58
+ wav_file.setnchannels(constants.AUDIO_CHANNELS)
59
+ wav_file.setsampwidth(constants.AUDIO_FORMAT_WIDTH) # 16-bit audio
60
+ wav_file.setframerate(constants.AUDIO_RATE)
61
+ wav_file.writeframes(audio_data)
62
+
63
+ logger.info("Saved audio recording to %s", filepath)
64
+ return filepath
65
+ except OSError:
66
+ logger.exception("Failed to save audio recording")
67
+ return None
68
+
69
+
70
+ def get_last_recording(index: int = 1) -> Path | None:
71
+ """Get the path to a recent recording file.
72
+
73
+ Args:
74
+ index: Which recording to get (1 = most recent, 2 = second-to-last, etc.)
75
+ Default is 1 (most recent).
76
+
77
+ Returns:
78
+ Path to the recording file, or None if not found.
79
+
80
+ """
81
+ if index < 1:
82
+ return None
83
+
84
+ transcriptions_dir = _get_transcriptions_dir()
85
+ recording_files = sorted(transcriptions_dir.glob("recording_*.wav"))
86
+
87
+ if recording_files and len(recording_files) >= index:
88
+ # -1 for most recent, -2 for second-to-last, etc.
89
+ return recording_files[-index]
90
+ return None
91
+
92
+
93
+ def _load_raw_audio(filepath: Path, logger: logging.Logger) -> bytes | None:
94
+ """Load raw audio bytes from file without conversion."""
95
+ try:
96
+ audio_data = filepath.read_bytes()
97
+ logger.info("Loaded raw audio from %s (%d bytes)", filepath, len(audio_data))
98
+ return audio_data
99
+ except OSError:
100
+ logger.exception("Failed to read audio file %s", filepath)
101
+ return None
102
+
103
+
104
+ def _load_wav_pcm(filepath: Path, logger: logging.Logger) -> bytes | None:
105
+ """Extract PCM frames from a WAV file."""
106
+ try:
107
+ with wave.open(str(filepath), "rb") as wav_file:
108
+ audio_data = wav_file.readframes(wav_file.getnframes())
109
+ logger.info("Loaded PCM audio from %s", filepath)
110
+ return audio_data
111
+ except (OSError, wave.Error):
112
+ logger.exception("Failed to load audio from %s", filepath)
113
+ return None
114
+
115
+
116
+ def load_audio_from_file(
117
+ filepath: Path,
118
+ logger: logging.Logger,
119
+ *,
120
+ convert_to_pcm: bool = True,
121
+ ) -> bytes | None:
122
+ """Load audio data from a file.
123
+
124
+ For WAV files, extracts raw PCM frames directly.
125
+ For other formats (mp3, m4a, ogg, flac, etc.), converts to PCM using ffmpeg.
126
+
127
+ Args:
128
+ filepath: Path to the audio file
129
+ logger: Logger instance
130
+ convert_to_pcm: If True, convert non-WAV files to PCM. If False, return raw file bytes.
131
+
132
+ Returns:
133
+ Audio data as bytes, or None if loading failed.
134
+
135
+ """
136
+ # If caller wants raw bytes (for APIs that handle conversion themselves)
137
+ if not convert_to_pcm:
138
+ return _load_raw_audio(filepath, logger)
139
+
140
+ # WAV files: extract PCM directly
141
+ if filepath.suffix.lower() == ".wav":
142
+ return _load_wav_pcm(filepath, logger)
143
+
144
+ # Other formats: convert to PCM using ffmpeg
145
+ if not check_ffmpeg_available():
146
+ logger.error("ffmpeg not found. Please install ffmpeg to transcribe non-WAV audio files.")
147
+ return None
148
+
149
+ try:
150
+ audio_bytes = filepath.read_bytes()
151
+ pcm_data = convert_audio_to_wyoming_format(audio_bytes, filepath.name)
152
+ logger.info("Converted %s to PCM using ffmpeg", filepath)
153
+ return pcm_data
154
+ except (OSError, RuntimeError):
155
+ logger.exception("Failed to convert %s", filepath)
156
+ return None
157
+
158
+
159
+ def create_transcriber(
160
+ provider_cfg: config.ProviderSelection,
161
+ audio_input_cfg: config.AudioInput,
162
+ wyoming_asr_cfg: config.WyomingASR,
163
+ openai_asr_cfg: config.OpenAIASR,
164
+ gemini_asr_cfg: config.GeminiASR | None = None,
165
+ ) -> Callable[..., Awaitable[str | None]]:
166
+ """Return the appropriate transcriber for live audio based on the provider."""
167
+ if provider_cfg.asr_provider == "wyoming":
168
+ # Wyoming has streaming support, uses its own implementation
169
+ return partial(
170
+ _transcribe_live_audio_wyoming,
171
+ audio_input_cfg=audio_input_cfg,
172
+ wyoming_asr_cfg=wyoming_asr_cfg,
173
+ )
174
+
175
+ # OpenAI and Gemini use the buffered record-then-transcribe pattern
176
+ if provider_cfg.asr_provider == "openai":
177
+ return partial(
178
+ _transcribe_live_audio_buffered,
179
+ audio_input_cfg=audio_input_cfg,
180
+ transcribe_fn=transcribe_audio_openai,
181
+ transcribe_cfg=openai_asr_cfg,
182
+ provider_name="OpenAI",
183
+ )
184
+ if provider_cfg.asr_provider == "gemini":
185
+ if gemini_asr_cfg is None:
186
+ msg = "Gemini ASR config is required when using gemini provider"
187
+ raise ValueError(msg)
188
+ return partial(
189
+ _transcribe_live_audio_buffered,
190
+ audio_input_cfg=audio_input_cfg,
191
+ transcribe_fn=transcribe_audio_gemini,
192
+ transcribe_cfg=gemini_asr_cfg,
193
+ provider_name="Gemini",
194
+ )
195
+ msg = f"Unsupported ASR provider: {provider_cfg.asr_provider}"
196
+ raise ValueError(msg)
197
+
198
+
199
+ def create_recorded_audio_transcriber(
200
+ provider_cfg: config.ProviderSelection,
201
+ ) -> Callable[..., Awaitable[str]]:
202
+ """Return the appropriate transcriber for recorded audio based on the provider."""
203
+ if provider_cfg.asr_provider == "openai":
204
+ return transcribe_audio_openai
205
+ if provider_cfg.asr_provider == "wyoming":
206
+ return _transcribe_recorded_audio_wyoming
207
+ if provider_cfg.asr_provider == "gemini":
208
+ return transcribe_audio_gemini
209
+ msg = f"Unsupported ASR provider: {provider_cfg.asr_provider}"
210
+ raise ValueError(msg)
211
+
212
+
213
+ async def _send_audio(
214
+ client: AsyncClient,
215
+ stream: sd.InputStream,
216
+ stop_event: InteractiveStopEvent,
217
+ logger: logging.Logger,
218
+ *,
219
+ live: Live,
220
+ quiet: bool = False,
221
+ save_recording: bool = True,
222
+ initial_prompt: str | None = None,
223
+ ) -> None:
224
+ """Read from mic and send to Wyoming server."""
225
+ from wyoming.asr import Transcribe # noqa: PLC0415
226
+ from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
227
+
228
+ # Build context with initial_prompt if provided
229
+ context = {"initial_prompt": initial_prompt} if initial_prompt else None
230
+ await client.write_event(Transcribe(context=context).event())
231
+ await client.write_event(AudioStart(**constants.WYOMING_AUDIO_CONFIG).event())
232
+
233
+ # Buffer to save audio if requested
234
+ audio_buffer = io.BytesIO() if save_recording else None
235
+
236
+ async def send_chunk(chunk: bytes) -> None:
237
+ """Send audio chunk to ASR server and optionally buffer it."""
238
+ if audio_buffer is not None:
239
+ audio_buffer.write(chunk)
240
+ await client.write_event(AudioChunk(audio=chunk, **constants.WYOMING_AUDIO_CONFIG).event())
241
+
242
+ try:
243
+ await read_audio_stream(
244
+ stream=stream,
245
+ stop_event=stop_event,
246
+ chunk_handler=send_chunk,
247
+ logger=logger,
248
+ live=live,
249
+ quiet=quiet,
250
+ progress_message="Listening",
251
+ progress_style="blue",
252
+ )
253
+ finally:
254
+ await client.write_event(AudioStop().event())
255
+ logger.debug("Sent AudioStop")
256
+
257
+ # Save the recording to disk if requested
258
+ if save_recording and audio_buffer:
259
+ audio_data = audio_buffer.getvalue()
260
+ if audio_data:
261
+ _save_audio_to_file(audio_data, logger)
262
+
263
+
264
+ async def record_audio_to_buffer(queue: asyncio.Queue, logger: logging.Logger) -> bytes:
265
+ """Record audio from a queue to a buffer."""
266
+ audio_buffer = io.BytesIO()
267
+
268
+ def buffer_chunk(chunk: bytes) -> None:
269
+ """Buffer audio chunk."""
270
+ audio_buffer.write(chunk)
271
+
272
+ await read_from_queue(queue=queue, chunk_handler=buffer_chunk, logger=logger)
273
+
274
+ return audio_buffer.getvalue()
275
+
276
+
277
+ async def _receive_transcript(
278
+ client: AsyncClient,
279
+ logger: logging.Logger,
280
+ *,
281
+ chunk_callback: Callable[[str], None] | None = None,
282
+ final_callback: Callable[[str], None] | None = None,
283
+ ) -> str:
284
+ """Receive transcription events and return the final transcript."""
285
+ from wyoming.asr import ( # noqa: PLC0415
286
+ Transcript,
287
+ TranscriptChunk,
288
+ TranscriptStart,
289
+ TranscriptStop,
290
+ )
291
+
292
+ transcript_text = ""
293
+ while True:
294
+ event = await client.read_event()
295
+ if event is None:
296
+ logger.warning("Connection to ASR server lost.")
297
+ break
298
+
299
+ if Transcript.is_type(event.type):
300
+ transcript = Transcript.from_event(event)
301
+ transcript_text = transcript.text
302
+ logger.info("Final transcript: %s", transcript_text)
303
+ if final_callback:
304
+ final_callback(transcript_text)
305
+ break
306
+ if TranscriptChunk.is_type(event.type):
307
+ chunk = TranscriptChunk.from_event(event)
308
+ logger.debug("Transcript chunk: %s", chunk.text)
309
+ if chunk_callback:
310
+ chunk_callback(chunk.text)
311
+ elif TranscriptStart.is_type(event.type) or TranscriptStop.is_type(event.type):
312
+ logger.debug("Received %s", event.type)
313
+ else:
314
+ logger.debug("Ignoring event type: %s", event.type)
315
+
316
+ return transcript_text
317
+
318
+
319
+ async def record_audio_with_manual_stop(
320
+ input_device_index: int | None,
321
+ stop_event: InteractiveStopEvent,
322
+ logger: logging.Logger,
323
+ *,
324
+ quiet: bool = False,
325
+ live: Live | None = None,
326
+ save_recording: bool = True,
327
+ ) -> bytes:
328
+ """Record audio to a buffer using a manual stop signal.
329
+
330
+ Args:
331
+ input_device_index: Audio input device index
332
+ stop_event: Event to stop recording
333
+ logger: Logger instance
334
+ quiet: If True, suppress console output
335
+ live: Rich Live display for progress
336
+ save_recording: If True, save the recording to disk
337
+
338
+ Returns:
339
+ The recorded audio data as bytes
340
+
341
+ """
342
+ audio_buffer = io.BytesIO()
343
+
344
+ def buffer_chunk(chunk: bytes) -> None:
345
+ """Buffer audio chunk."""
346
+ audio_buffer.write(chunk)
347
+
348
+ stream_config = setup_input_stream(input_device_index)
349
+ with open_audio_stream(stream_config) as stream:
350
+ await read_audio_stream(
351
+ stream=stream,
352
+ stop_event=stop_event,
353
+ chunk_handler=buffer_chunk,
354
+ logger=logger,
355
+ live=live,
356
+ quiet=quiet,
357
+ progress_message="Recording",
358
+ progress_style="green",
359
+ )
360
+
361
+ audio_data = audio_buffer.getvalue()
362
+
363
+ # Save the recording to disk if requested
364
+ if save_recording and audio_data:
365
+ _save_audio_to_file(audio_data, logger)
366
+
367
+ return audio_data
368
+
369
+
370
+ async def _transcribe_recorded_audio_wyoming(
371
+ *,
372
+ audio_data: bytes,
373
+ wyoming_asr_cfg: config.WyomingASR,
374
+ logger: logging.Logger,
375
+ quiet: bool = False,
376
+ extra_instructions: str | None = None,
377
+ **_kwargs: object,
378
+ ) -> str:
379
+ """Process pre-recorded audio data with Wyoming ASR server."""
380
+ from wyoming.asr import Transcribe # noqa: PLC0415
381
+ from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
382
+
383
+ try:
384
+ async with wyoming_client_context(
385
+ wyoming_asr_cfg.asr_wyoming_ip,
386
+ wyoming_asr_cfg.asr_wyoming_port,
387
+ "ASR",
388
+ logger,
389
+ quiet=quiet,
390
+ ) as client:
391
+ # Get effective prompt and pass via context
392
+ effective_prompt = wyoming_asr_cfg.get_effective_prompt(extra_instructions)
393
+ context = {"initial_prompt": effective_prompt} if effective_prompt else None
394
+ await client.write_event(Transcribe(context=context).event())
395
+ await client.write_event(AudioStart(**constants.WYOMING_AUDIO_CONFIG).event())
396
+
397
+ chunk_size = constants.AUDIO_CHUNK_SIZE * 2
398
+ for i in range(0, len(audio_data), chunk_size):
399
+ chunk = audio_data[i : i + chunk_size]
400
+ await client.write_event(
401
+ AudioChunk(audio=chunk, **constants.WYOMING_AUDIO_CONFIG).event(),
402
+ )
403
+ logger.debug("Sent %d byte(s) of audio", len(chunk))
404
+
405
+ await client.write_event(AudioStop().event())
406
+ logger.debug("Sent AudioStop")
407
+
408
+ return await _receive_transcript(client, logger)
409
+ except (ConnectionRefusedError, Exception):
410
+ logger.warning("Failed to connect to Wyoming ASR server")
411
+ return ""
412
+
413
+
414
+ async def _transcribe_live_audio_wyoming(
415
+ *,
416
+ audio_input_cfg: config.AudioInput,
417
+ wyoming_asr_cfg: config.WyomingASR,
418
+ logger: logging.Logger,
419
+ stop_event: InteractiveStopEvent,
420
+ live: Live,
421
+ quiet: bool = False,
422
+ save_recording: bool = True,
423
+ chunk_callback: Callable[[str], None] | None = None,
424
+ final_callback: Callable[[str], None] | None = None,
425
+ extra_instructions: str | None = None,
426
+ **_kwargs: object,
427
+ ) -> str | None:
428
+ """Unified ASR transcription function."""
429
+ try:
430
+ async with wyoming_client_context(
431
+ wyoming_asr_cfg.asr_wyoming_ip,
432
+ wyoming_asr_cfg.asr_wyoming_port,
433
+ "ASR",
434
+ logger,
435
+ quiet=quiet,
436
+ ) as client:
437
+ # Get effective prompt for Wyoming
438
+ effective_prompt = wyoming_asr_cfg.get_effective_prompt(extra_instructions)
439
+ if effective_prompt:
440
+ logger.info("Using initial_prompt for Wyoming ASR: %s...", effective_prompt[:50])
441
+
442
+ stream_config = setup_input_stream(audio_input_cfg.input_device_index)
443
+ with open_audio_stream(stream_config) as stream:
444
+ _, recv_task = await manage_send_receive_tasks(
445
+ _send_audio(
446
+ client,
447
+ stream,
448
+ stop_event,
449
+ logger,
450
+ live=live,
451
+ quiet=quiet,
452
+ save_recording=save_recording,
453
+ initial_prompt=effective_prompt,
454
+ ),
455
+ _receive_transcript(
456
+ client,
457
+ logger,
458
+ chunk_callback=chunk_callback,
459
+ final_callback=final_callback,
460
+ ),
461
+ return_when=asyncio.ALL_COMPLETED,
462
+ )
463
+ return recv_task.result()
464
+ except (ConnectionRefusedError, Exception):
465
+ logger.warning("Failed to connect to Wyoming ASR server")
466
+ return None
467
+
468
+
469
+ async def _transcribe_live_audio_buffered(
470
+ *,
471
+ audio_input_cfg: config.AudioInput,
472
+ transcribe_fn: Callable[..., Awaitable[str]],
473
+ transcribe_cfg: config.OpenAIASR | config.GeminiASR,
474
+ provider_name: str,
475
+ logger: logging.Logger,
476
+ stop_event: InteractiveStopEvent,
477
+ live: Live,
478
+ quiet: bool = False,
479
+ save_recording: bool = True,
480
+ extra_instructions: str | None = None,
481
+ **_kwargs: object,
482
+ ) -> str | None:
483
+ """Record audio to buffer, then transcribe.
484
+
485
+ Used for providers (OpenAI, Gemini) that don't support streaming transcription.
486
+ """
487
+ audio_data = await record_audio_with_manual_stop(
488
+ audio_input_cfg.input_device_index,
489
+ stop_event,
490
+ logger,
491
+ quiet=quiet,
492
+ live=live,
493
+ save_recording=save_recording,
494
+ )
495
+ if not audio_data:
496
+ return None
497
+ try:
498
+ return await transcribe_fn(
499
+ audio_data,
500
+ transcribe_cfg,
501
+ logger,
502
+ extra_instructions=extra_instructions,
503
+ )
504
+ except Exception:
505
+ logger.exception("Error during %s transcription", provider_name)
506
+ return ""