agent-cli 0.70.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/__init__.py +5 -0
- agent_cli/__main__.py +6 -0
- agent_cli/_extras.json +14 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/_tools.py +368 -0
- agent_cli/agents/__init__.py +23 -0
- agent_cli/agents/_voice_agent_common.py +136 -0
- agent_cli/agents/assistant.py +383 -0
- agent_cli/agents/autocorrect.py +284 -0
- agent_cli/agents/chat.py +496 -0
- agent_cli/agents/memory/__init__.py +31 -0
- agent_cli/agents/memory/add.py +190 -0
- agent_cli/agents/memory/proxy.py +160 -0
- agent_cli/agents/rag_proxy.py +128 -0
- agent_cli/agents/speak.py +209 -0
- agent_cli/agents/transcribe.py +671 -0
- agent_cli/agents/transcribe_daemon.py +499 -0
- agent_cli/agents/voice_edit.py +291 -0
- agent_cli/api.py +22 -0
- agent_cli/cli.py +106 -0
- agent_cli/config.py +503 -0
- agent_cli/config_cmd.py +307 -0
- agent_cli/constants.py +27 -0
- agent_cli/core/__init__.py +1 -0
- agent_cli/core/audio.py +461 -0
- agent_cli/core/audio_format.py +299 -0
- agent_cli/core/chroma.py +88 -0
- agent_cli/core/deps.py +191 -0
- agent_cli/core/openai_proxy.py +139 -0
- agent_cli/core/process.py +195 -0
- agent_cli/core/reranker.py +120 -0
- agent_cli/core/sse.py +87 -0
- agent_cli/core/transcription_logger.py +70 -0
- agent_cli/core/utils.py +526 -0
- agent_cli/core/vad.py +175 -0
- agent_cli/core/watch.py +65 -0
- agent_cli/dev/__init__.py +14 -0
- agent_cli/dev/cli.py +1588 -0
- agent_cli/dev/coding_agents/__init__.py +19 -0
- agent_cli/dev/coding_agents/aider.py +24 -0
- agent_cli/dev/coding_agents/base.py +167 -0
- agent_cli/dev/coding_agents/claude.py +39 -0
- agent_cli/dev/coding_agents/codex.py +24 -0
- agent_cli/dev/coding_agents/continue_dev.py +15 -0
- agent_cli/dev/coding_agents/copilot.py +24 -0
- agent_cli/dev/coding_agents/cursor_agent.py +48 -0
- agent_cli/dev/coding_agents/gemini.py +28 -0
- agent_cli/dev/coding_agents/opencode.py +15 -0
- agent_cli/dev/coding_agents/registry.py +49 -0
- agent_cli/dev/editors/__init__.py +19 -0
- agent_cli/dev/editors/base.py +89 -0
- agent_cli/dev/editors/cursor.py +15 -0
- agent_cli/dev/editors/emacs.py +46 -0
- agent_cli/dev/editors/jetbrains.py +56 -0
- agent_cli/dev/editors/nano.py +31 -0
- agent_cli/dev/editors/neovim.py +33 -0
- agent_cli/dev/editors/registry.py +59 -0
- agent_cli/dev/editors/sublime.py +20 -0
- agent_cli/dev/editors/vim.py +42 -0
- agent_cli/dev/editors/vscode.py +15 -0
- agent_cli/dev/editors/zed.py +20 -0
- agent_cli/dev/project.py +568 -0
- agent_cli/dev/registry.py +52 -0
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/terminals/__init__.py +19 -0
- agent_cli/dev/terminals/apple_terminal.py +82 -0
- agent_cli/dev/terminals/base.py +56 -0
- agent_cli/dev/terminals/gnome.py +51 -0
- agent_cli/dev/terminals/iterm2.py +84 -0
- agent_cli/dev/terminals/kitty.py +77 -0
- agent_cli/dev/terminals/registry.py +48 -0
- agent_cli/dev/terminals/tmux.py +58 -0
- agent_cli/dev/terminals/warp.py +132 -0
- agent_cli/dev/terminals/zellij.py +78 -0
- agent_cli/dev/worktree.py +856 -0
- agent_cli/docs_gen.py +417 -0
- agent_cli/example-config.toml +185 -0
- agent_cli/install/__init__.py +5 -0
- agent_cli/install/common.py +89 -0
- agent_cli/install/extras.py +174 -0
- agent_cli/install/hotkeys.py +48 -0
- agent_cli/install/services.py +87 -0
- agent_cli/memory/__init__.py +7 -0
- agent_cli/memory/_files.py +250 -0
- agent_cli/memory/_filters.py +63 -0
- agent_cli/memory/_git.py +157 -0
- agent_cli/memory/_indexer.py +142 -0
- agent_cli/memory/_ingest.py +408 -0
- agent_cli/memory/_persistence.py +182 -0
- agent_cli/memory/_prompt.py +91 -0
- agent_cli/memory/_retrieval.py +294 -0
- agent_cli/memory/_store.py +169 -0
- agent_cli/memory/_streaming.py +44 -0
- agent_cli/memory/_tasks.py +48 -0
- agent_cli/memory/api.py +113 -0
- agent_cli/memory/client.py +272 -0
- agent_cli/memory/engine.py +361 -0
- agent_cli/memory/entities.py +43 -0
- agent_cli/memory/models.py +112 -0
- agent_cli/opts.py +433 -0
- agent_cli/py.typed +0 -0
- agent_cli/rag/__init__.py +3 -0
- agent_cli/rag/_indexer.py +67 -0
- agent_cli/rag/_indexing.py +226 -0
- agent_cli/rag/_prompt.py +30 -0
- agent_cli/rag/_retriever.py +156 -0
- agent_cli/rag/_store.py +48 -0
- agent_cli/rag/_utils.py +218 -0
- agent_cli/rag/api.py +175 -0
- agent_cli/rag/client.py +299 -0
- agent_cli/rag/engine.py +302 -0
- agent_cli/rag/models.py +55 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/__init__.py +1 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/linux-hotkeys/README.md +63 -0
- agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
- agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
- agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
- agent_cli/scripts/macos-hotkeys/README.md +45 -0
- agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
- agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
- agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
- agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
- agent_cli/scripts/nvidia-asr-server/README.md +99 -0
- agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
- agent_cli/scripts/nvidia-asr-server/server.py +255 -0
- agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
- agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
- agent_cli/scripts/run-openwakeword.sh +11 -0
- agent_cli/scripts/run-piper-windows.ps1 +30 -0
- agent_cli/scripts/run-piper.sh +24 -0
- agent_cli/scripts/run-whisper-linux.sh +40 -0
- agent_cli/scripts/run-whisper-macos.sh +6 -0
- agent_cli/scripts/run-whisper-windows.ps1 +51 -0
- agent_cli/scripts/run-whisper.sh +9 -0
- agent_cli/scripts/run_faster_whisper_server.py +136 -0
- agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
- agent_cli/scripts/setup-linux.sh +108 -0
- agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
- agent_cli/scripts/setup-macos.sh +76 -0
- agent_cli/scripts/setup-windows.ps1 +63 -0
- agent_cli/scripts/start-all-services-windows.ps1 +53 -0
- agent_cli/scripts/start-all-services.sh +178 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/__init__.py +3 -0
- agent_cli/server/cli.py +721 -0
- agent_cli/server/common.py +222 -0
- agent_cli/server/model_manager.py +288 -0
- agent_cli/server/model_registry.py +225 -0
- agent_cli/server/proxy/__init__.py +3 -0
- agent_cli/server/proxy/api.py +444 -0
- agent_cli/server/streaming.py +67 -0
- agent_cli/server/tts/__init__.py +3 -0
- agent_cli/server/tts/api.py +335 -0
- agent_cli/server/tts/backends/__init__.py +82 -0
- agent_cli/server/tts/backends/base.py +139 -0
- agent_cli/server/tts/backends/kokoro.py +403 -0
- agent_cli/server/tts/backends/piper.py +253 -0
- agent_cli/server/tts/model_manager.py +201 -0
- agent_cli/server/tts/model_registry.py +28 -0
- agent_cli/server/tts/wyoming_handler.py +249 -0
- agent_cli/server/whisper/__init__.py +3 -0
- agent_cli/server/whisper/api.py +413 -0
- agent_cli/server/whisper/backends/__init__.py +89 -0
- agent_cli/server/whisper/backends/base.py +97 -0
- agent_cli/server/whisper/backends/faster_whisper.py +225 -0
- agent_cli/server/whisper/backends/mlx.py +270 -0
- agent_cli/server/whisper/languages.py +116 -0
- agent_cli/server/whisper/model_manager.py +157 -0
- agent_cli/server/whisper/model_registry.py +28 -0
- agent_cli/server/whisper/wyoming_handler.py +203 -0
- agent_cli/services/__init__.py +343 -0
- agent_cli/services/_wyoming_utils.py +64 -0
- agent_cli/services/asr.py +506 -0
- agent_cli/services/llm.py +228 -0
- agent_cli/services/tts.py +450 -0
- agent_cli/services/wake_word.py +142 -0
- agent_cli-0.70.5.dist-info/METADATA +2118 -0
- agent_cli-0.70.5.dist-info/RECORD +196 -0
- agent_cli-0.70.5.dist-info/WHEEL +4 -0
- agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
- agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,506 @@
|
|
|
1
|
+
"""Module for Automatic Speech Recognition using Wyoming or OpenAI."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
import io
|
|
7
|
+
import wave
|
|
8
|
+
from datetime import UTC, datetime
|
|
9
|
+
from functools import partial
|
|
10
|
+
from pathlib import Path
|
|
11
|
+
from typing import TYPE_CHECKING
|
|
12
|
+
|
|
13
|
+
from agent_cli import constants
|
|
14
|
+
from agent_cli.core.audio import (
|
|
15
|
+
open_audio_stream,
|
|
16
|
+
read_audio_stream,
|
|
17
|
+
read_from_queue,
|
|
18
|
+
setup_input_stream,
|
|
19
|
+
)
|
|
20
|
+
from agent_cli.core.audio_format import check_ffmpeg_available, convert_audio_to_wyoming_format
|
|
21
|
+
from agent_cli.core.utils import manage_send_receive_tasks
|
|
22
|
+
from agent_cli.services import (
|
|
23
|
+
transcribe_audio_gemini,
|
|
24
|
+
transcribe_audio_openai,
|
|
25
|
+
)
|
|
26
|
+
from agent_cli.services._wyoming_utils import wyoming_client_context
|
|
27
|
+
|
|
28
|
+
if TYPE_CHECKING:
|
|
29
|
+
import logging
|
|
30
|
+
from collections.abc import Awaitable, Callable
|
|
31
|
+
|
|
32
|
+
import sounddevice as sd
|
|
33
|
+
from rich.live import Live
|
|
34
|
+
from wyoming.client import AsyncClient
|
|
35
|
+
|
|
36
|
+
from agent_cli import config
|
|
37
|
+
from agent_cli.core.utils import InteractiveStopEvent
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _get_transcriptions_dir() -> Path:
|
|
41
|
+
"""Get the directory for storing transcription recordings."""
|
|
42
|
+
config_dir = Path.home() / ".config" / "agent-cli" / "transcriptions"
|
|
43
|
+
config_dir.mkdir(parents=True, exist_ok=True)
|
|
44
|
+
return config_dir
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _save_audio_to_file(audio_data: bytes, logger: logging.Logger) -> Path | None:
|
|
48
|
+
"""Save audio data to a WAV file with timestamp-based filename.
|
|
49
|
+
|
|
50
|
+
Returns the path to the saved file, or None if saving failed.
|
|
51
|
+
"""
|
|
52
|
+
try:
|
|
53
|
+
timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S_%f")[:-3] # Include milliseconds
|
|
54
|
+
filename = f"recording_{timestamp}.wav"
|
|
55
|
+
filepath = _get_transcriptions_dir() / filename
|
|
56
|
+
|
|
57
|
+
with wave.open(str(filepath), "wb") as wav_file:
|
|
58
|
+
wav_file.setnchannels(constants.AUDIO_CHANNELS)
|
|
59
|
+
wav_file.setsampwidth(constants.AUDIO_FORMAT_WIDTH) # 16-bit audio
|
|
60
|
+
wav_file.setframerate(constants.AUDIO_RATE)
|
|
61
|
+
wav_file.writeframes(audio_data)
|
|
62
|
+
|
|
63
|
+
logger.info("Saved audio recording to %s", filepath)
|
|
64
|
+
return filepath
|
|
65
|
+
except OSError:
|
|
66
|
+
logger.exception("Failed to save audio recording")
|
|
67
|
+
return None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def get_last_recording(index: int = 1) -> Path | None:
|
|
71
|
+
"""Get the path to a recent recording file.
|
|
72
|
+
|
|
73
|
+
Args:
|
|
74
|
+
index: Which recording to get (1 = most recent, 2 = second-to-last, etc.)
|
|
75
|
+
Default is 1 (most recent).
|
|
76
|
+
|
|
77
|
+
Returns:
|
|
78
|
+
Path to the recording file, or None if not found.
|
|
79
|
+
|
|
80
|
+
"""
|
|
81
|
+
if index < 1:
|
|
82
|
+
return None
|
|
83
|
+
|
|
84
|
+
transcriptions_dir = _get_transcriptions_dir()
|
|
85
|
+
recording_files = sorted(transcriptions_dir.glob("recording_*.wav"))
|
|
86
|
+
|
|
87
|
+
if recording_files and len(recording_files) >= index:
|
|
88
|
+
# -1 for most recent, -2 for second-to-last, etc.
|
|
89
|
+
return recording_files[-index]
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _load_raw_audio(filepath: Path, logger: logging.Logger) -> bytes | None:
|
|
94
|
+
"""Load raw audio bytes from file without conversion."""
|
|
95
|
+
try:
|
|
96
|
+
audio_data = filepath.read_bytes()
|
|
97
|
+
logger.info("Loaded raw audio from %s (%d bytes)", filepath, len(audio_data))
|
|
98
|
+
return audio_data
|
|
99
|
+
except OSError:
|
|
100
|
+
logger.exception("Failed to read audio file %s", filepath)
|
|
101
|
+
return None
|
|
102
|
+
|
|
103
|
+
|
|
104
|
+
def _load_wav_pcm(filepath: Path, logger: logging.Logger) -> bytes | None:
|
|
105
|
+
"""Extract PCM frames from a WAV file."""
|
|
106
|
+
try:
|
|
107
|
+
with wave.open(str(filepath), "rb") as wav_file:
|
|
108
|
+
audio_data = wav_file.readframes(wav_file.getnframes())
|
|
109
|
+
logger.info("Loaded PCM audio from %s", filepath)
|
|
110
|
+
return audio_data
|
|
111
|
+
except (OSError, wave.Error):
|
|
112
|
+
logger.exception("Failed to load audio from %s", filepath)
|
|
113
|
+
return None
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def load_audio_from_file(
|
|
117
|
+
filepath: Path,
|
|
118
|
+
logger: logging.Logger,
|
|
119
|
+
*,
|
|
120
|
+
convert_to_pcm: bool = True,
|
|
121
|
+
) -> bytes | None:
|
|
122
|
+
"""Load audio data from a file.
|
|
123
|
+
|
|
124
|
+
For WAV files, extracts raw PCM frames directly.
|
|
125
|
+
For other formats (mp3, m4a, ogg, flac, etc.), converts to PCM using ffmpeg.
|
|
126
|
+
|
|
127
|
+
Args:
|
|
128
|
+
filepath: Path to the audio file
|
|
129
|
+
logger: Logger instance
|
|
130
|
+
convert_to_pcm: If True, convert non-WAV files to PCM. If False, return raw file bytes.
|
|
131
|
+
|
|
132
|
+
Returns:
|
|
133
|
+
Audio data as bytes, or None if loading failed.
|
|
134
|
+
|
|
135
|
+
"""
|
|
136
|
+
# If caller wants raw bytes (for APIs that handle conversion themselves)
|
|
137
|
+
if not convert_to_pcm:
|
|
138
|
+
return _load_raw_audio(filepath, logger)
|
|
139
|
+
|
|
140
|
+
# WAV files: extract PCM directly
|
|
141
|
+
if filepath.suffix.lower() == ".wav":
|
|
142
|
+
return _load_wav_pcm(filepath, logger)
|
|
143
|
+
|
|
144
|
+
# Other formats: convert to PCM using ffmpeg
|
|
145
|
+
if not check_ffmpeg_available():
|
|
146
|
+
logger.error("ffmpeg not found. Please install ffmpeg to transcribe non-WAV audio files.")
|
|
147
|
+
return None
|
|
148
|
+
|
|
149
|
+
try:
|
|
150
|
+
audio_bytes = filepath.read_bytes()
|
|
151
|
+
pcm_data = convert_audio_to_wyoming_format(audio_bytes, filepath.name)
|
|
152
|
+
logger.info("Converted %s to PCM using ffmpeg", filepath)
|
|
153
|
+
return pcm_data
|
|
154
|
+
except (OSError, RuntimeError):
|
|
155
|
+
logger.exception("Failed to convert %s", filepath)
|
|
156
|
+
return None
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def create_transcriber(
|
|
160
|
+
provider_cfg: config.ProviderSelection,
|
|
161
|
+
audio_input_cfg: config.AudioInput,
|
|
162
|
+
wyoming_asr_cfg: config.WyomingASR,
|
|
163
|
+
openai_asr_cfg: config.OpenAIASR,
|
|
164
|
+
gemini_asr_cfg: config.GeminiASR | None = None,
|
|
165
|
+
) -> Callable[..., Awaitable[str | None]]:
|
|
166
|
+
"""Return the appropriate transcriber for live audio based on the provider."""
|
|
167
|
+
if provider_cfg.asr_provider == "wyoming":
|
|
168
|
+
# Wyoming has streaming support, uses its own implementation
|
|
169
|
+
return partial(
|
|
170
|
+
_transcribe_live_audio_wyoming,
|
|
171
|
+
audio_input_cfg=audio_input_cfg,
|
|
172
|
+
wyoming_asr_cfg=wyoming_asr_cfg,
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# OpenAI and Gemini use the buffered record-then-transcribe pattern
|
|
176
|
+
if provider_cfg.asr_provider == "openai":
|
|
177
|
+
return partial(
|
|
178
|
+
_transcribe_live_audio_buffered,
|
|
179
|
+
audio_input_cfg=audio_input_cfg,
|
|
180
|
+
transcribe_fn=transcribe_audio_openai,
|
|
181
|
+
transcribe_cfg=openai_asr_cfg,
|
|
182
|
+
provider_name="OpenAI",
|
|
183
|
+
)
|
|
184
|
+
if provider_cfg.asr_provider == "gemini":
|
|
185
|
+
if gemini_asr_cfg is None:
|
|
186
|
+
msg = "Gemini ASR config is required when using gemini provider"
|
|
187
|
+
raise ValueError(msg)
|
|
188
|
+
return partial(
|
|
189
|
+
_transcribe_live_audio_buffered,
|
|
190
|
+
audio_input_cfg=audio_input_cfg,
|
|
191
|
+
transcribe_fn=transcribe_audio_gemini,
|
|
192
|
+
transcribe_cfg=gemini_asr_cfg,
|
|
193
|
+
provider_name="Gemini",
|
|
194
|
+
)
|
|
195
|
+
msg = f"Unsupported ASR provider: {provider_cfg.asr_provider}"
|
|
196
|
+
raise ValueError(msg)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
def create_recorded_audio_transcriber(
|
|
200
|
+
provider_cfg: config.ProviderSelection,
|
|
201
|
+
) -> Callable[..., Awaitable[str]]:
|
|
202
|
+
"""Return the appropriate transcriber for recorded audio based on the provider."""
|
|
203
|
+
if provider_cfg.asr_provider == "openai":
|
|
204
|
+
return transcribe_audio_openai
|
|
205
|
+
if provider_cfg.asr_provider == "wyoming":
|
|
206
|
+
return _transcribe_recorded_audio_wyoming
|
|
207
|
+
if provider_cfg.asr_provider == "gemini":
|
|
208
|
+
return transcribe_audio_gemini
|
|
209
|
+
msg = f"Unsupported ASR provider: {provider_cfg.asr_provider}"
|
|
210
|
+
raise ValueError(msg)
|
|
211
|
+
|
|
212
|
+
|
|
213
|
+
async def _send_audio(
|
|
214
|
+
client: AsyncClient,
|
|
215
|
+
stream: sd.InputStream,
|
|
216
|
+
stop_event: InteractiveStopEvent,
|
|
217
|
+
logger: logging.Logger,
|
|
218
|
+
*,
|
|
219
|
+
live: Live,
|
|
220
|
+
quiet: bool = False,
|
|
221
|
+
save_recording: bool = True,
|
|
222
|
+
initial_prompt: str | None = None,
|
|
223
|
+
) -> None:
|
|
224
|
+
"""Read from mic and send to Wyoming server."""
|
|
225
|
+
from wyoming.asr import Transcribe # noqa: PLC0415
|
|
226
|
+
from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
|
|
227
|
+
|
|
228
|
+
# Build context with initial_prompt if provided
|
|
229
|
+
context = {"initial_prompt": initial_prompt} if initial_prompt else None
|
|
230
|
+
await client.write_event(Transcribe(context=context).event())
|
|
231
|
+
await client.write_event(AudioStart(**constants.WYOMING_AUDIO_CONFIG).event())
|
|
232
|
+
|
|
233
|
+
# Buffer to save audio if requested
|
|
234
|
+
audio_buffer = io.BytesIO() if save_recording else None
|
|
235
|
+
|
|
236
|
+
async def send_chunk(chunk: bytes) -> None:
|
|
237
|
+
"""Send audio chunk to ASR server and optionally buffer it."""
|
|
238
|
+
if audio_buffer is not None:
|
|
239
|
+
audio_buffer.write(chunk)
|
|
240
|
+
await client.write_event(AudioChunk(audio=chunk, **constants.WYOMING_AUDIO_CONFIG).event())
|
|
241
|
+
|
|
242
|
+
try:
|
|
243
|
+
await read_audio_stream(
|
|
244
|
+
stream=stream,
|
|
245
|
+
stop_event=stop_event,
|
|
246
|
+
chunk_handler=send_chunk,
|
|
247
|
+
logger=logger,
|
|
248
|
+
live=live,
|
|
249
|
+
quiet=quiet,
|
|
250
|
+
progress_message="Listening",
|
|
251
|
+
progress_style="blue",
|
|
252
|
+
)
|
|
253
|
+
finally:
|
|
254
|
+
await client.write_event(AudioStop().event())
|
|
255
|
+
logger.debug("Sent AudioStop")
|
|
256
|
+
|
|
257
|
+
# Save the recording to disk if requested
|
|
258
|
+
if save_recording and audio_buffer:
|
|
259
|
+
audio_data = audio_buffer.getvalue()
|
|
260
|
+
if audio_data:
|
|
261
|
+
_save_audio_to_file(audio_data, logger)
|
|
262
|
+
|
|
263
|
+
|
|
264
|
+
async def record_audio_to_buffer(queue: asyncio.Queue, logger: logging.Logger) -> bytes:
|
|
265
|
+
"""Record audio from a queue to a buffer."""
|
|
266
|
+
audio_buffer = io.BytesIO()
|
|
267
|
+
|
|
268
|
+
def buffer_chunk(chunk: bytes) -> None:
|
|
269
|
+
"""Buffer audio chunk."""
|
|
270
|
+
audio_buffer.write(chunk)
|
|
271
|
+
|
|
272
|
+
await read_from_queue(queue=queue, chunk_handler=buffer_chunk, logger=logger)
|
|
273
|
+
|
|
274
|
+
return audio_buffer.getvalue()
|
|
275
|
+
|
|
276
|
+
|
|
277
|
+
async def _receive_transcript(
|
|
278
|
+
client: AsyncClient,
|
|
279
|
+
logger: logging.Logger,
|
|
280
|
+
*,
|
|
281
|
+
chunk_callback: Callable[[str], None] | None = None,
|
|
282
|
+
final_callback: Callable[[str], None] | None = None,
|
|
283
|
+
) -> str:
|
|
284
|
+
"""Receive transcription events and return the final transcript."""
|
|
285
|
+
from wyoming.asr import ( # noqa: PLC0415
|
|
286
|
+
Transcript,
|
|
287
|
+
TranscriptChunk,
|
|
288
|
+
TranscriptStart,
|
|
289
|
+
TranscriptStop,
|
|
290
|
+
)
|
|
291
|
+
|
|
292
|
+
transcript_text = ""
|
|
293
|
+
while True:
|
|
294
|
+
event = await client.read_event()
|
|
295
|
+
if event is None:
|
|
296
|
+
logger.warning("Connection to ASR server lost.")
|
|
297
|
+
break
|
|
298
|
+
|
|
299
|
+
if Transcript.is_type(event.type):
|
|
300
|
+
transcript = Transcript.from_event(event)
|
|
301
|
+
transcript_text = transcript.text
|
|
302
|
+
logger.info("Final transcript: %s", transcript_text)
|
|
303
|
+
if final_callback:
|
|
304
|
+
final_callback(transcript_text)
|
|
305
|
+
break
|
|
306
|
+
if TranscriptChunk.is_type(event.type):
|
|
307
|
+
chunk = TranscriptChunk.from_event(event)
|
|
308
|
+
logger.debug("Transcript chunk: %s", chunk.text)
|
|
309
|
+
if chunk_callback:
|
|
310
|
+
chunk_callback(chunk.text)
|
|
311
|
+
elif TranscriptStart.is_type(event.type) or TranscriptStop.is_type(event.type):
|
|
312
|
+
logger.debug("Received %s", event.type)
|
|
313
|
+
else:
|
|
314
|
+
logger.debug("Ignoring event type: %s", event.type)
|
|
315
|
+
|
|
316
|
+
return transcript_text
|
|
317
|
+
|
|
318
|
+
|
|
319
|
+
async def record_audio_with_manual_stop(
|
|
320
|
+
input_device_index: int | None,
|
|
321
|
+
stop_event: InteractiveStopEvent,
|
|
322
|
+
logger: logging.Logger,
|
|
323
|
+
*,
|
|
324
|
+
quiet: bool = False,
|
|
325
|
+
live: Live | None = None,
|
|
326
|
+
save_recording: bool = True,
|
|
327
|
+
) -> bytes:
|
|
328
|
+
"""Record audio to a buffer using a manual stop signal.
|
|
329
|
+
|
|
330
|
+
Args:
|
|
331
|
+
input_device_index: Audio input device index
|
|
332
|
+
stop_event: Event to stop recording
|
|
333
|
+
logger: Logger instance
|
|
334
|
+
quiet: If True, suppress console output
|
|
335
|
+
live: Rich Live display for progress
|
|
336
|
+
save_recording: If True, save the recording to disk
|
|
337
|
+
|
|
338
|
+
Returns:
|
|
339
|
+
The recorded audio data as bytes
|
|
340
|
+
|
|
341
|
+
"""
|
|
342
|
+
audio_buffer = io.BytesIO()
|
|
343
|
+
|
|
344
|
+
def buffer_chunk(chunk: bytes) -> None:
|
|
345
|
+
"""Buffer audio chunk."""
|
|
346
|
+
audio_buffer.write(chunk)
|
|
347
|
+
|
|
348
|
+
stream_config = setup_input_stream(input_device_index)
|
|
349
|
+
with open_audio_stream(stream_config) as stream:
|
|
350
|
+
await read_audio_stream(
|
|
351
|
+
stream=stream,
|
|
352
|
+
stop_event=stop_event,
|
|
353
|
+
chunk_handler=buffer_chunk,
|
|
354
|
+
logger=logger,
|
|
355
|
+
live=live,
|
|
356
|
+
quiet=quiet,
|
|
357
|
+
progress_message="Recording",
|
|
358
|
+
progress_style="green",
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
audio_data = audio_buffer.getvalue()
|
|
362
|
+
|
|
363
|
+
# Save the recording to disk if requested
|
|
364
|
+
if save_recording and audio_data:
|
|
365
|
+
_save_audio_to_file(audio_data, logger)
|
|
366
|
+
|
|
367
|
+
return audio_data
|
|
368
|
+
|
|
369
|
+
|
|
370
|
+
async def _transcribe_recorded_audio_wyoming(
|
|
371
|
+
*,
|
|
372
|
+
audio_data: bytes,
|
|
373
|
+
wyoming_asr_cfg: config.WyomingASR,
|
|
374
|
+
logger: logging.Logger,
|
|
375
|
+
quiet: bool = False,
|
|
376
|
+
extra_instructions: str | None = None,
|
|
377
|
+
**_kwargs: object,
|
|
378
|
+
) -> str:
|
|
379
|
+
"""Process pre-recorded audio data with Wyoming ASR server."""
|
|
380
|
+
from wyoming.asr import Transcribe # noqa: PLC0415
|
|
381
|
+
from wyoming.audio import AudioChunk, AudioStart, AudioStop # noqa: PLC0415
|
|
382
|
+
|
|
383
|
+
try:
|
|
384
|
+
async with wyoming_client_context(
|
|
385
|
+
wyoming_asr_cfg.asr_wyoming_ip,
|
|
386
|
+
wyoming_asr_cfg.asr_wyoming_port,
|
|
387
|
+
"ASR",
|
|
388
|
+
logger,
|
|
389
|
+
quiet=quiet,
|
|
390
|
+
) as client:
|
|
391
|
+
# Get effective prompt and pass via context
|
|
392
|
+
effective_prompt = wyoming_asr_cfg.get_effective_prompt(extra_instructions)
|
|
393
|
+
context = {"initial_prompt": effective_prompt} if effective_prompt else None
|
|
394
|
+
await client.write_event(Transcribe(context=context).event())
|
|
395
|
+
await client.write_event(AudioStart(**constants.WYOMING_AUDIO_CONFIG).event())
|
|
396
|
+
|
|
397
|
+
chunk_size = constants.AUDIO_CHUNK_SIZE * 2
|
|
398
|
+
for i in range(0, len(audio_data), chunk_size):
|
|
399
|
+
chunk = audio_data[i : i + chunk_size]
|
|
400
|
+
await client.write_event(
|
|
401
|
+
AudioChunk(audio=chunk, **constants.WYOMING_AUDIO_CONFIG).event(),
|
|
402
|
+
)
|
|
403
|
+
logger.debug("Sent %d byte(s) of audio", len(chunk))
|
|
404
|
+
|
|
405
|
+
await client.write_event(AudioStop().event())
|
|
406
|
+
logger.debug("Sent AudioStop")
|
|
407
|
+
|
|
408
|
+
return await _receive_transcript(client, logger)
|
|
409
|
+
except (ConnectionRefusedError, Exception):
|
|
410
|
+
logger.warning("Failed to connect to Wyoming ASR server")
|
|
411
|
+
return ""
|
|
412
|
+
|
|
413
|
+
|
|
414
|
+
async def _transcribe_live_audio_wyoming(
|
|
415
|
+
*,
|
|
416
|
+
audio_input_cfg: config.AudioInput,
|
|
417
|
+
wyoming_asr_cfg: config.WyomingASR,
|
|
418
|
+
logger: logging.Logger,
|
|
419
|
+
stop_event: InteractiveStopEvent,
|
|
420
|
+
live: Live,
|
|
421
|
+
quiet: bool = False,
|
|
422
|
+
save_recording: bool = True,
|
|
423
|
+
chunk_callback: Callable[[str], None] | None = None,
|
|
424
|
+
final_callback: Callable[[str], None] | None = None,
|
|
425
|
+
extra_instructions: str | None = None,
|
|
426
|
+
**_kwargs: object,
|
|
427
|
+
) -> str | None:
|
|
428
|
+
"""Unified ASR transcription function."""
|
|
429
|
+
try:
|
|
430
|
+
async with wyoming_client_context(
|
|
431
|
+
wyoming_asr_cfg.asr_wyoming_ip,
|
|
432
|
+
wyoming_asr_cfg.asr_wyoming_port,
|
|
433
|
+
"ASR",
|
|
434
|
+
logger,
|
|
435
|
+
quiet=quiet,
|
|
436
|
+
) as client:
|
|
437
|
+
# Get effective prompt for Wyoming
|
|
438
|
+
effective_prompt = wyoming_asr_cfg.get_effective_prompt(extra_instructions)
|
|
439
|
+
if effective_prompt:
|
|
440
|
+
logger.info("Using initial_prompt for Wyoming ASR: %s...", effective_prompt[:50])
|
|
441
|
+
|
|
442
|
+
stream_config = setup_input_stream(audio_input_cfg.input_device_index)
|
|
443
|
+
with open_audio_stream(stream_config) as stream:
|
|
444
|
+
_, recv_task = await manage_send_receive_tasks(
|
|
445
|
+
_send_audio(
|
|
446
|
+
client,
|
|
447
|
+
stream,
|
|
448
|
+
stop_event,
|
|
449
|
+
logger,
|
|
450
|
+
live=live,
|
|
451
|
+
quiet=quiet,
|
|
452
|
+
save_recording=save_recording,
|
|
453
|
+
initial_prompt=effective_prompt,
|
|
454
|
+
),
|
|
455
|
+
_receive_transcript(
|
|
456
|
+
client,
|
|
457
|
+
logger,
|
|
458
|
+
chunk_callback=chunk_callback,
|
|
459
|
+
final_callback=final_callback,
|
|
460
|
+
),
|
|
461
|
+
return_when=asyncio.ALL_COMPLETED,
|
|
462
|
+
)
|
|
463
|
+
return recv_task.result()
|
|
464
|
+
except (ConnectionRefusedError, Exception):
|
|
465
|
+
logger.warning("Failed to connect to Wyoming ASR server")
|
|
466
|
+
return None
|
|
467
|
+
|
|
468
|
+
|
|
469
|
+
async def _transcribe_live_audio_buffered(
|
|
470
|
+
*,
|
|
471
|
+
audio_input_cfg: config.AudioInput,
|
|
472
|
+
transcribe_fn: Callable[..., Awaitable[str]],
|
|
473
|
+
transcribe_cfg: config.OpenAIASR | config.GeminiASR,
|
|
474
|
+
provider_name: str,
|
|
475
|
+
logger: logging.Logger,
|
|
476
|
+
stop_event: InteractiveStopEvent,
|
|
477
|
+
live: Live,
|
|
478
|
+
quiet: bool = False,
|
|
479
|
+
save_recording: bool = True,
|
|
480
|
+
extra_instructions: str | None = None,
|
|
481
|
+
**_kwargs: object,
|
|
482
|
+
) -> str | None:
|
|
483
|
+
"""Record audio to buffer, then transcribe.
|
|
484
|
+
|
|
485
|
+
Used for providers (OpenAI, Gemini) that don't support streaming transcription.
|
|
486
|
+
"""
|
|
487
|
+
audio_data = await record_audio_with_manual_stop(
|
|
488
|
+
audio_input_cfg.input_device_index,
|
|
489
|
+
stop_event,
|
|
490
|
+
logger,
|
|
491
|
+
quiet=quiet,
|
|
492
|
+
live=live,
|
|
493
|
+
save_recording=save_recording,
|
|
494
|
+
)
|
|
495
|
+
if not audio_data:
|
|
496
|
+
return None
|
|
497
|
+
try:
|
|
498
|
+
return await transcribe_fn(
|
|
499
|
+
audio_data,
|
|
500
|
+
transcribe_cfg,
|
|
501
|
+
logger,
|
|
502
|
+
extra_instructions=extra_instructions,
|
|
503
|
+
)
|
|
504
|
+
except Exception:
|
|
505
|
+
logger.exception("Error during %s transcription", provider_name)
|
|
506
|
+
return ""
|