agent-cli 0.70.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (196) hide show
  1. agent_cli/__init__.py +5 -0
  2. agent_cli/__main__.py +6 -0
  3. agent_cli/_extras.json +14 -0
  4. agent_cli/_requirements/.gitkeep +0 -0
  5. agent_cli/_requirements/audio.txt +79 -0
  6. agent_cli/_requirements/faster-whisper.txt +215 -0
  7. agent_cli/_requirements/kokoro.txt +425 -0
  8. agent_cli/_requirements/llm.txt +183 -0
  9. agent_cli/_requirements/memory.txt +355 -0
  10. agent_cli/_requirements/mlx-whisper.txt +222 -0
  11. agent_cli/_requirements/piper.txt +176 -0
  12. agent_cli/_requirements/rag.txt +402 -0
  13. agent_cli/_requirements/server.txt +154 -0
  14. agent_cli/_requirements/speed.txt +77 -0
  15. agent_cli/_requirements/vad.txt +155 -0
  16. agent_cli/_requirements/wyoming.txt +71 -0
  17. agent_cli/_tools.py +368 -0
  18. agent_cli/agents/__init__.py +23 -0
  19. agent_cli/agents/_voice_agent_common.py +136 -0
  20. agent_cli/agents/assistant.py +383 -0
  21. agent_cli/agents/autocorrect.py +284 -0
  22. agent_cli/agents/chat.py +496 -0
  23. agent_cli/agents/memory/__init__.py +31 -0
  24. agent_cli/agents/memory/add.py +190 -0
  25. agent_cli/agents/memory/proxy.py +160 -0
  26. agent_cli/agents/rag_proxy.py +128 -0
  27. agent_cli/agents/speak.py +209 -0
  28. agent_cli/agents/transcribe.py +671 -0
  29. agent_cli/agents/transcribe_daemon.py +499 -0
  30. agent_cli/agents/voice_edit.py +291 -0
  31. agent_cli/api.py +22 -0
  32. agent_cli/cli.py +106 -0
  33. agent_cli/config.py +503 -0
  34. agent_cli/config_cmd.py +307 -0
  35. agent_cli/constants.py +27 -0
  36. agent_cli/core/__init__.py +1 -0
  37. agent_cli/core/audio.py +461 -0
  38. agent_cli/core/audio_format.py +299 -0
  39. agent_cli/core/chroma.py +88 -0
  40. agent_cli/core/deps.py +191 -0
  41. agent_cli/core/openai_proxy.py +139 -0
  42. agent_cli/core/process.py +195 -0
  43. agent_cli/core/reranker.py +120 -0
  44. agent_cli/core/sse.py +87 -0
  45. agent_cli/core/transcription_logger.py +70 -0
  46. agent_cli/core/utils.py +526 -0
  47. agent_cli/core/vad.py +175 -0
  48. agent_cli/core/watch.py +65 -0
  49. agent_cli/dev/__init__.py +14 -0
  50. agent_cli/dev/cli.py +1588 -0
  51. agent_cli/dev/coding_agents/__init__.py +19 -0
  52. agent_cli/dev/coding_agents/aider.py +24 -0
  53. agent_cli/dev/coding_agents/base.py +167 -0
  54. agent_cli/dev/coding_agents/claude.py +39 -0
  55. agent_cli/dev/coding_agents/codex.py +24 -0
  56. agent_cli/dev/coding_agents/continue_dev.py +15 -0
  57. agent_cli/dev/coding_agents/copilot.py +24 -0
  58. agent_cli/dev/coding_agents/cursor_agent.py +48 -0
  59. agent_cli/dev/coding_agents/gemini.py +28 -0
  60. agent_cli/dev/coding_agents/opencode.py +15 -0
  61. agent_cli/dev/coding_agents/registry.py +49 -0
  62. agent_cli/dev/editors/__init__.py +19 -0
  63. agent_cli/dev/editors/base.py +89 -0
  64. agent_cli/dev/editors/cursor.py +15 -0
  65. agent_cli/dev/editors/emacs.py +46 -0
  66. agent_cli/dev/editors/jetbrains.py +56 -0
  67. agent_cli/dev/editors/nano.py +31 -0
  68. agent_cli/dev/editors/neovim.py +33 -0
  69. agent_cli/dev/editors/registry.py +59 -0
  70. agent_cli/dev/editors/sublime.py +20 -0
  71. agent_cli/dev/editors/vim.py +42 -0
  72. agent_cli/dev/editors/vscode.py +15 -0
  73. agent_cli/dev/editors/zed.py +20 -0
  74. agent_cli/dev/project.py +568 -0
  75. agent_cli/dev/registry.py +52 -0
  76. agent_cli/dev/skill/SKILL.md +141 -0
  77. agent_cli/dev/skill/examples.md +571 -0
  78. agent_cli/dev/terminals/__init__.py +19 -0
  79. agent_cli/dev/terminals/apple_terminal.py +82 -0
  80. agent_cli/dev/terminals/base.py +56 -0
  81. agent_cli/dev/terminals/gnome.py +51 -0
  82. agent_cli/dev/terminals/iterm2.py +84 -0
  83. agent_cli/dev/terminals/kitty.py +77 -0
  84. agent_cli/dev/terminals/registry.py +48 -0
  85. agent_cli/dev/terminals/tmux.py +58 -0
  86. agent_cli/dev/terminals/warp.py +132 -0
  87. agent_cli/dev/terminals/zellij.py +78 -0
  88. agent_cli/dev/worktree.py +856 -0
  89. agent_cli/docs_gen.py +417 -0
  90. agent_cli/example-config.toml +185 -0
  91. agent_cli/install/__init__.py +5 -0
  92. agent_cli/install/common.py +89 -0
  93. agent_cli/install/extras.py +174 -0
  94. agent_cli/install/hotkeys.py +48 -0
  95. agent_cli/install/services.py +87 -0
  96. agent_cli/memory/__init__.py +7 -0
  97. agent_cli/memory/_files.py +250 -0
  98. agent_cli/memory/_filters.py +63 -0
  99. agent_cli/memory/_git.py +157 -0
  100. agent_cli/memory/_indexer.py +142 -0
  101. agent_cli/memory/_ingest.py +408 -0
  102. agent_cli/memory/_persistence.py +182 -0
  103. agent_cli/memory/_prompt.py +91 -0
  104. agent_cli/memory/_retrieval.py +294 -0
  105. agent_cli/memory/_store.py +169 -0
  106. agent_cli/memory/_streaming.py +44 -0
  107. agent_cli/memory/_tasks.py +48 -0
  108. agent_cli/memory/api.py +113 -0
  109. agent_cli/memory/client.py +272 -0
  110. agent_cli/memory/engine.py +361 -0
  111. agent_cli/memory/entities.py +43 -0
  112. agent_cli/memory/models.py +112 -0
  113. agent_cli/opts.py +433 -0
  114. agent_cli/py.typed +0 -0
  115. agent_cli/rag/__init__.py +3 -0
  116. agent_cli/rag/_indexer.py +67 -0
  117. agent_cli/rag/_indexing.py +226 -0
  118. agent_cli/rag/_prompt.py +30 -0
  119. agent_cli/rag/_retriever.py +156 -0
  120. agent_cli/rag/_store.py +48 -0
  121. agent_cli/rag/_utils.py +218 -0
  122. agent_cli/rag/api.py +175 -0
  123. agent_cli/rag/client.py +299 -0
  124. agent_cli/rag/engine.py +302 -0
  125. agent_cli/rag/models.py +55 -0
  126. agent_cli/scripts/.runtime/.gitkeep +0 -0
  127. agent_cli/scripts/__init__.py +1 -0
  128. agent_cli/scripts/check_plugin_skill_sync.py +50 -0
  129. agent_cli/scripts/linux-hotkeys/README.md +63 -0
  130. agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
  131. agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
  132. agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
  133. agent_cli/scripts/macos-hotkeys/README.md +45 -0
  134. agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
  135. agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
  136. agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
  137. agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
  138. agent_cli/scripts/nvidia-asr-server/README.md +99 -0
  139. agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
  140. agent_cli/scripts/nvidia-asr-server/server.py +255 -0
  141. agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
  142. agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
  143. agent_cli/scripts/run-openwakeword.sh +11 -0
  144. agent_cli/scripts/run-piper-windows.ps1 +30 -0
  145. agent_cli/scripts/run-piper.sh +24 -0
  146. agent_cli/scripts/run-whisper-linux.sh +40 -0
  147. agent_cli/scripts/run-whisper-macos.sh +6 -0
  148. agent_cli/scripts/run-whisper-windows.ps1 +51 -0
  149. agent_cli/scripts/run-whisper.sh +9 -0
  150. agent_cli/scripts/run_faster_whisper_server.py +136 -0
  151. agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
  152. agent_cli/scripts/setup-linux.sh +108 -0
  153. agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
  154. agent_cli/scripts/setup-macos.sh +76 -0
  155. agent_cli/scripts/setup-windows.ps1 +63 -0
  156. agent_cli/scripts/start-all-services-windows.ps1 +53 -0
  157. agent_cli/scripts/start-all-services.sh +178 -0
  158. agent_cli/scripts/sync_extras.py +138 -0
  159. agent_cli/server/__init__.py +3 -0
  160. agent_cli/server/cli.py +721 -0
  161. agent_cli/server/common.py +222 -0
  162. agent_cli/server/model_manager.py +288 -0
  163. agent_cli/server/model_registry.py +225 -0
  164. agent_cli/server/proxy/__init__.py +3 -0
  165. agent_cli/server/proxy/api.py +444 -0
  166. agent_cli/server/streaming.py +67 -0
  167. agent_cli/server/tts/__init__.py +3 -0
  168. agent_cli/server/tts/api.py +335 -0
  169. agent_cli/server/tts/backends/__init__.py +82 -0
  170. agent_cli/server/tts/backends/base.py +139 -0
  171. agent_cli/server/tts/backends/kokoro.py +403 -0
  172. agent_cli/server/tts/backends/piper.py +253 -0
  173. agent_cli/server/tts/model_manager.py +201 -0
  174. agent_cli/server/tts/model_registry.py +28 -0
  175. agent_cli/server/tts/wyoming_handler.py +249 -0
  176. agent_cli/server/whisper/__init__.py +3 -0
  177. agent_cli/server/whisper/api.py +413 -0
  178. agent_cli/server/whisper/backends/__init__.py +89 -0
  179. agent_cli/server/whisper/backends/base.py +97 -0
  180. agent_cli/server/whisper/backends/faster_whisper.py +225 -0
  181. agent_cli/server/whisper/backends/mlx.py +270 -0
  182. agent_cli/server/whisper/languages.py +116 -0
  183. agent_cli/server/whisper/model_manager.py +157 -0
  184. agent_cli/server/whisper/model_registry.py +28 -0
  185. agent_cli/server/whisper/wyoming_handler.py +203 -0
  186. agent_cli/services/__init__.py +343 -0
  187. agent_cli/services/_wyoming_utils.py +64 -0
  188. agent_cli/services/asr.py +506 -0
  189. agent_cli/services/llm.py +228 -0
  190. agent_cli/services/tts.py +450 -0
  191. agent_cli/services/wake_word.py +142 -0
  192. agent_cli-0.70.5.dist-info/METADATA +2118 -0
  193. agent_cli-0.70.5.dist-info/RECORD +196 -0
  194. agent_cli-0.70.5.dist-info/WHEEL +4 -0
  195. agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
  196. agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
@@ -0,0 +1,403 @@
1
+ """Kokoro TTS backend using PyTorch-based synthesis."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import io
7
+ import logging
8
+ import time
9
+ import wave
10
+ from concurrent.futures import ProcessPoolExecutor
11
+ from dataclasses import dataclass, field
12
+ from multiprocessing import Manager, get_context
13
+ from pathlib import Path
14
+ from typing import TYPE_CHECKING, Any
15
+
16
+ from agent_cli import constants
17
+ from agent_cli.core.process import set_process_title
18
+ from agent_cli.server.streaming import AsyncQueueReader, QueueWriter
19
+ from agent_cli.server.tts.backends.base import (
20
+ BackendConfig,
21
+ InvalidTextError,
22
+ SynthesisResult,
23
+ get_backend_cache_dir,
24
+ get_torch_device,
25
+ )
26
+
27
+ if TYPE_CHECKING:
28
+ from collections.abc import AsyncIterator
29
+
30
+ logger = logging.getLogger(__name__)
31
+
32
+ # HuggingFace repository for Kokoro model and voices
33
+ KOKORO_HF_REPO = "hexgrad/Kokoro-82M"
34
+
35
+ # Default voice if none specified
36
+ DEFAULT_VOICE = "af_heart"
37
+
38
+
39
+ # --- Subprocess state (only used within subprocess worker) ---
40
+ # This state persists across function calls within the subprocess because:
41
+ # 1. Model loading is expensive and must be reused across synthesis calls
42
+ # 2. PyTorch models cannot be pickled/passed through IPC queues
43
+ # 3. The subprocess is long-lived (ProcessPoolExecutor reuses workers)
44
+
45
+
46
+ @dataclass
47
+ class _SubprocessState:
48
+ """Container for subprocess-local state. Not shared with main process."""
49
+
50
+ model: Any = None
51
+ device: str | None = None
52
+ pipelines: dict[str, Any] = field(default_factory=dict)
53
+
54
+
55
+ _state = _SubprocessState()
56
+
57
+
58
+ # --- Subprocess worker functions (run in isolated process) ---
59
+
60
+
61
+ def _hf_download(filename: str, local_dir: Path) -> Path:
62
+ """Download a file from Kokoro HuggingFace repo."""
63
+ from huggingface_hub import hf_hub_download # noqa: PLC0415
64
+
65
+ local_dir.mkdir(parents=True, exist_ok=True)
66
+ hf_hub_download(repo_id=KOKORO_HF_REPO, filename=filename, local_dir=local_dir)
67
+ return local_dir / Path(filename).name
68
+
69
+
70
+ def _ensure_model(cache_dir: Path) -> Path:
71
+ """Ensure model and config exist, downloading if needed."""
72
+ model_dir = cache_dir / "model"
73
+ model_path = model_dir / "kokoro-v1_0.pth"
74
+ config_path = model_dir / "config.json"
75
+
76
+ if not model_path.exists():
77
+ logger.info("Downloading Kokoro model...")
78
+ _hf_download("kokoro-v1_0.pth", model_dir)
79
+ if not config_path.exists():
80
+ logger.info("Downloading Kokoro config...")
81
+ _hf_download("config.json", model_dir)
82
+
83
+ return model_path
84
+
85
+
86
+ def _ensure_voice(voice_name: str, cache_dir: Path) -> Path:
87
+ """Ensure voice file exists, downloading if needed."""
88
+ voice_path = cache_dir / "voices" / f"{voice_name}.pt"
89
+ if not voice_path.exists():
90
+ logger.info("Downloading voice '%s'...", voice_name)
91
+ # HuggingFace downloads to local_dir/filename, so pass cache_dir (not cache_dir/voices)
92
+ _hf_download(f"voices/{voice_name}.pt", cache_dir)
93
+ return voice_path
94
+
95
+
96
+ def _resolve_model_path(model_name: str, cache_dir: Path) -> Path:
97
+ """Resolve model path, downloading if necessary."""
98
+ # Explicit path to existing file
99
+ path = Path(model_name)
100
+ if path.exists() and path.suffix == ".pth":
101
+ return path
102
+
103
+ # Otherwise download from HuggingFace
104
+ return _ensure_model(cache_dir)
105
+
106
+
107
+ def _resolve_voice_path(voice: str | None, cache_dir: Path) -> tuple[str, str]:
108
+ """Resolve voice name to path and determine language code."""
109
+ voice_name = voice or DEFAULT_VOICE
110
+
111
+ # Explicit path to existing file
112
+ path = Path(voice_name)
113
+ if path.exists() and path.suffix == ".pt":
114
+ # Kokoro convention: first letter of voice name = language code (a=American, b=British, etc.)
115
+ return str(path), path.stem[0].lower()
116
+
117
+ # Download from HuggingFace if needed
118
+ voice_path = _ensure_voice(voice_name, cache_dir)
119
+ return str(voice_path), voice_name[0].lower()
120
+
121
+
122
+ def _get_pipeline(voice: str | None, cache_dir: str) -> tuple[Any, str]:
123
+ """Get or create pipeline for the given voice. Returns (pipeline, voice_path)."""
124
+ from kokoro import KPipeline # noqa: PLC0415
125
+
126
+ cache_path = Path(cache_dir)
127
+ voice_path, lang_code = _resolve_voice_path(voice, cache_path)
128
+
129
+ if lang_code not in _state.pipelines:
130
+ _state.pipelines[lang_code] = KPipeline(
131
+ lang_code=lang_code,
132
+ model=_state.model,
133
+ device=_state.device,
134
+ )
135
+
136
+ return _state.pipelines[lang_code], voice_path
137
+
138
+
139
+ def _load_model_in_subprocess(
140
+ model_name: str,
141
+ device: str,
142
+ cache_dir: str,
143
+ ) -> str:
144
+ """Load Kokoro model in subprocess. Returns actual device string."""
145
+ import torch # noqa: PLC0415
146
+ from kokoro import KModel, KPipeline # noqa: PLC0415
147
+
148
+ set_process_title("tts-kokoro")
149
+ cache_path = Path(cache_dir)
150
+
151
+ # Resolve model path (downloads if needed)
152
+ model_path = _resolve_model_path(model_name, cache_path)
153
+ config_path = model_path.parent / "config.json"
154
+
155
+ # Determine actual device
156
+ if device == "auto":
157
+ device = get_torch_device()
158
+
159
+ # Load and move model to device
160
+ model = KModel(config=str(config_path), model=str(model_path)).eval()
161
+ if device == "cuda":
162
+ model = model.cuda()
163
+ elif device == "mps":
164
+ model = model.to(torch.device("mps"))
165
+
166
+ # Store in subprocess state for reuse
167
+ _state.model = model
168
+ _state.device = device
169
+ _state.pipelines = {}
170
+
171
+ # Warmup pipeline for default language
172
+ lang = DEFAULT_VOICE[0]
173
+ logger.info("Warming up pipeline for lang_code '%s'...", lang)
174
+ _state.pipelines[lang] = KPipeline(lang_code=lang, model=model, device=device)
175
+
176
+ return device
177
+
178
+
179
+ def _synthesize_in_subprocess(
180
+ text: str,
181
+ voice: str | None,
182
+ speed: float,
183
+ cache_dir: str,
184
+ ) -> dict[str, Any]:
185
+ """Synthesize text to audio in subprocess."""
186
+ import numpy as np # noqa: PLC0415
187
+
188
+ pipeline, voice_path = _get_pipeline(voice, cache_dir)
189
+
190
+ # Synthesize and collect audio chunks
191
+ audio_chunks = [
192
+ r.audio.numpy()
193
+ for r in pipeline(text, voice=voice_path, speed=speed, model=_state.model)
194
+ if r.audio is not None
195
+ ]
196
+ if not audio_chunks:
197
+ msg = "No audio generated"
198
+ raise RuntimeError(msg)
199
+
200
+ # Convert to int16 WAV
201
+ audio = np.concatenate(audio_chunks)
202
+ audio_int16 = (audio * 32767).astype(np.int16)
203
+
204
+ sample_rate = constants.KOKORO_DEFAULT_SAMPLE_RATE
205
+ buffer = io.BytesIO()
206
+ with wave.open(buffer, "wb") as wav:
207
+ wav.setnchannels(1)
208
+ wav.setsampwidth(2)
209
+ wav.setframerate(sample_rate)
210
+ wav.writeframes(audio_int16.tobytes())
211
+
212
+ return {
213
+ "audio": buffer.getvalue(),
214
+ "sample_rate": sample_rate,
215
+ "duration": len(audio_int16) / sample_rate,
216
+ }
217
+
218
+
219
+ def _synthesize_stream_in_subprocess(
220
+ text: str,
221
+ voice: str | None,
222
+ speed: float,
223
+ cache_dir: str,
224
+ output_queue: Any, # Manager queue proxy
225
+ ) -> None:
226
+ """Stream audio chunks through queue as Kokoro generates them."""
227
+ import numpy as np # noqa: PLC0415
228
+
229
+ writer = QueueWriter(output_queue)
230
+
231
+ try:
232
+ pipeline, voice_path = _get_pipeline(voice, cache_dir)
233
+
234
+ chunk_count = 0
235
+ total_samples = 0
236
+
237
+ for result in pipeline(text, voice=voice_path, speed=speed, model=_state.model):
238
+ if result.audio is not None:
239
+ # Convert to int16 PCM bytes
240
+ audio_int16 = (result.audio.numpy() * 32767).astype(np.int16)
241
+ writer.send_data(audio_int16.tobytes())
242
+ chunk_count += 1
243
+ total_samples += len(audio_int16)
244
+
245
+ sample_rate = constants.KOKORO_DEFAULT_SAMPLE_RATE
246
+ writer.send_done(
247
+ {
248
+ "chunk_count": chunk_count,
249
+ "total_samples": total_samples,
250
+ "duration": total_samples / sample_rate,
251
+ "sample_rate": sample_rate,
252
+ },
253
+ )
254
+
255
+ except Exception as e:
256
+ writer.send_error(e)
257
+
258
+
259
+ class KokoroBackend:
260
+ """Kokoro TTS backend with subprocess isolation.
261
+
262
+ Uses kokoro library for high-quality neural TTS on CUDA, MPS, or CPU.
263
+ Models and voices auto-download from HuggingFace on first use.
264
+ Subprocess terminates on unload, releasing all GPU/CPU memory.
265
+ """
266
+
267
+ def __init__(self, config: BackendConfig) -> None:
268
+ """Initialize the Kokoro backend."""
269
+ self._config = config
270
+ self._executor: ProcessPoolExecutor | None = None
271
+ self._device: str | None = None
272
+ self._cache_dir = config.cache_dir or get_backend_cache_dir("kokoro")
273
+
274
+ @property
275
+ def is_loaded(self) -> bool:
276
+ """Check if the model is currently loaded."""
277
+ return self._executor is not None
278
+
279
+ @property
280
+ def device(self) -> str | None:
281
+ """Get the device the model is loaded on."""
282
+ return self._device
283
+
284
+ async def load(self) -> float:
285
+ """Load model in subprocess. Downloads from HuggingFace if needed."""
286
+ if self._executor is not None:
287
+ return 0.0
288
+
289
+ start_time = time.time()
290
+ ctx = get_context("spawn")
291
+ self._executor = ProcessPoolExecutor(max_workers=1, mp_context=ctx)
292
+
293
+ loop = asyncio.get_running_loop()
294
+ self._device = await loop.run_in_executor(
295
+ self._executor,
296
+ _load_model_in_subprocess,
297
+ self._config.model_name,
298
+ self._config.device,
299
+ str(self._cache_dir),
300
+ )
301
+
302
+ load_duration = time.time() - start_time
303
+ logger.info("Loaded Kokoro model on %s in %.2fs", self._device, load_duration)
304
+ return load_duration
305
+
306
+ async def unload(self) -> None:
307
+ """Shutdown subprocess, releasing all memory."""
308
+ if self._executor is None:
309
+ return
310
+ self._executor.shutdown(wait=False, cancel_futures=True)
311
+ self._executor = None
312
+ self._device = None
313
+ logger.info("Kokoro model unloaded (subprocess terminated)")
314
+
315
+ async def synthesize(
316
+ self,
317
+ text: str,
318
+ *,
319
+ voice: str | None = None,
320
+ speed: float = 1.0,
321
+ ) -> SynthesisResult:
322
+ """Synthesize text to audio."""
323
+ if self._executor is None:
324
+ msg = "Model not loaded. Call load() first."
325
+ raise RuntimeError(msg)
326
+
327
+ if not text or not text.strip():
328
+ msg = "Text cannot be empty"
329
+ raise InvalidTextError(msg)
330
+
331
+ loop = asyncio.get_running_loop()
332
+ result = await loop.run_in_executor(
333
+ self._executor,
334
+ _synthesize_in_subprocess,
335
+ text,
336
+ voice,
337
+ speed,
338
+ str(self._cache_dir),
339
+ )
340
+
341
+ return SynthesisResult(
342
+ audio=result["audio"],
343
+ sample_rate=result["sample_rate"],
344
+ sample_width=2,
345
+ channels=1,
346
+ duration=result["duration"],
347
+ )
348
+
349
+ @property
350
+ def supports_streaming(self) -> bool:
351
+ """Kokoro backend supports streaming synthesis."""
352
+ return True
353
+
354
+ async def synthesize_stream(
355
+ self,
356
+ text: str,
357
+ *,
358
+ voice: str | None = None,
359
+ speed: float = 1.0,
360
+ ) -> AsyncIterator[bytes]:
361
+ """Stream synthesized audio chunks as they are generated."""
362
+ if self._executor is None:
363
+ msg = "Model not loaded. Call load() first."
364
+ raise RuntimeError(msg)
365
+
366
+ if not text or not text.strip():
367
+ msg = "Text cannot be empty"
368
+ raise InvalidTextError(msg)
369
+
370
+ # Use Manager queue for cross-process communication
371
+ # Manager queues work with already-running subprocesses
372
+ manager = Manager()
373
+ try:
374
+ queue = manager.Queue(maxsize=10) # Backpressure control
375
+ loop = asyncio.get_running_loop()
376
+
377
+ # Submit streaming worker to subprocess
378
+ # Manager queue is a proxy that works with already-running subprocesses
379
+ future = loop.run_in_executor(
380
+ self._executor,
381
+ _synthesize_stream_in_subprocess,
382
+ text,
383
+ voice,
384
+ speed,
385
+ str(self._cache_dir),
386
+ queue, # type: ignore[arg-type]
387
+ )
388
+
389
+ # Yield chunks as they arrive
390
+ reader = AsyncQueueReader(queue, timeout=30.0) # type: ignore[arg-type]
391
+ async for chunk in reader:
392
+ if chunk.chunk_type == "done":
393
+ break
394
+ if chunk.chunk_type == "error":
395
+ msg = str(chunk.payload)
396
+ raise RuntimeError(msg)
397
+ if chunk.payload is not None:
398
+ yield chunk.payload # type: ignore[misc]
399
+
400
+ # Ensure subprocess completes
401
+ await future
402
+ finally:
403
+ manager.shutdown()
@@ -0,0 +1,253 @@
1
+ """Piper TTS backend using piper-tts library."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ import io
7
+ import logging
8
+ import time
9
+ import wave
10
+ from pathlib import Path
11
+ from typing import TYPE_CHECKING, Any, NoReturn
12
+
13
+ from agent_cli import constants
14
+ from agent_cli.server.tts.backends.base import (
15
+ BackendConfig,
16
+ InvalidTextError,
17
+ SynthesisResult,
18
+ get_backend_cache_dir,
19
+ )
20
+
21
+ if TYPE_CHECKING:
22
+ from piper import PiperVoice
23
+
24
+ logger = logging.getLogger(__name__)
25
+
26
+
27
+ def _load_model_sync(
28
+ model_name: str,
29
+ cache_dir: str | None,
30
+ ) -> tuple[Any, int]:
31
+ """Load Piper model synchronously (for use in process pool).
32
+
33
+ Args:
34
+ model_name: Model name (e.g., 'en_US-lessac-medium') or path to .onnx file.
35
+ cache_dir: Optional cache directory for downloaded models.
36
+
37
+ Returns:
38
+ Tuple of (PiperVoice, sample_rate).
39
+
40
+ """
41
+ from piper import PiperVoice # noqa: PLC0415
42
+ from piper.download_voices import download_voice # noqa: PLC0415
43
+
44
+ # Use default cache dir if not specified
45
+ download_dir = Path(cache_dir) if cache_dir else get_backend_cache_dir("piper")
46
+ download_dir.mkdir(parents=True, exist_ok=True)
47
+
48
+ # Check if model_name is already a path to an existing file
49
+ model_path = Path(model_name)
50
+ if model_path.exists() and model_path.suffix == ".onnx":
51
+ # Direct path to model file
52
+ voice = PiperVoice.load(str(model_path), use_cuda=False)
53
+ return voice, voice.config.sample_rate
54
+
55
+ # Otherwise, treat as a voice name and download if needed
56
+ voice_code = model_name.strip()
57
+ expected_model_path = download_dir / f"{voice_code}.onnx"
58
+
59
+ if not expected_model_path.exists():
60
+ logger.info("Downloading Piper voice: %s", voice_code)
61
+ download_voice(voice_code, download_dir)
62
+
63
+ # Load the voice
64
+ voice = PiperVoice.load(str(expected_model_path), use_cuda=False)
65
+
66
+ return voice, voice.config.sample_rate
67
+
68
+
69
+ def _synthesize_sync(
70
+ voice: PiperVoice,
71
+ text: str,
72
+ sample_rate: int,
73
+ length_scale: float,
74
+ ) -> tuple[bytes, float]:
75
+ """Synthesize text to audio synchronously.
76
+
77
+ Args:
78
+ voice: Loaded PiperVoice instance.
79
+ text: Text to synthesize.
80
+ sample_rate: Sample rate from model config.
81
+ length_scale: Length scale (inverse of speed).
82
+
83
+ Returns:
84
+ Tuple of (audio_bytes, duration_seconds).
85
+
86
+ """
87
+ from piper import SynthesisConfig # noqa: PLC0415
88
+
89
+ # Create synthesis config with speed adjustment
90
+ syn_config = SynthesisConfig(length_scale=length_scale)
91
+
92
+ # Create WAV buffer
93
+ buffer = io.BytesIO()
94
+ with wave.open(buffer, "wb") as wav_file:
95
+ wav_file.setnchannels(1)
96
+ wav_file.setsampwidth(2) # 16-bit
97
+ wav_file.setframerate(sample_rate)
98
+
99
+ # Synthesize and write audio chunks
100
+ for audio_chunk in voice.synthesize(text, syn_config):
101
+ wav_file.writeframes(audio_chunk.audio_int16_bytes)
102
+
103
+ audio_data = buffer.getvalue()
104
+
105
+ # Calculate duration: PCM data size / (sample_rate * channels * bytes_per_sample)
106
+ data_size = len(audio_data) - constants.WAV_HEADER_SIZE
107
+ duration = data_size / (sample_rate * 1 * 2)
108
+
109
+ return audio_data, duration
110
+
111
+
112
+ class PiperBackend:
113
+ """Piper TTS backend using ONNX-based synthesis.
114
+
115
+ This backend uses the piper-tts library for fast, CPU-friendly TTS.
116
+ Models are downloaded from HuggingFace on first use.
117
+ """
118
+
119
+ def __init__(self, config: BackendConfig) -> None:
120
+ """Initialize the Piper backend.
121
+
122
+ Args:
123
+ config: Backend configuration.
124
+
125
+ """
126
+ self._config = config
127
+ self._voice: PiperVoice | None = None
128
+ self._sample_rate: int = constants.PIPER_DEFAULT_SAMPLE_RATE # Updated on load
129
+ self._device: str | None = None
130
+
131
+ @property
132
+ def is_loaded(self) -> bool:
133
+ """Check if the model is currently loaded."""
134
+ return self._voice is not None
135
+
136
+ @property
137
+ def device(self) -> str | None:
138
+ """Get the device the model is loaded on, or None if not loaded."""
139
+ return self._device
140
+
141
+ async def load(self) -> float:
142
+ """Load the model into memory.
143
+
144
+ Returns:
145
+ Load duration in seconds.
146
+
147
+ """
148
+ if self._voice is not None:
149
+ return 0.0
150
+
151
+ start_time = time.time()
152
+
153
+ # Load synchronously since Piper is fast and CPU-only
154
+ loop = asyncio.get_running_loop()
155
+ voice, sample_rate = await loop.run_in_executor(
156
+ None,
157
+ _load_model_sync,
158
+ self._config.model_name,
159
+ str(self._config.cache_dir) if self._config.cache_dir else None,
160
+ )
161
+
162
+ self._voice = voice
163
+ self._sample_rate = sample_rate
164
+ self._device = "cpu" # Piper is CPU-only
165
+
166
+ load_duration = time.time() - start_time
167
+ logger.info(
168
+ "Loaded Piper model %s in %.2fs (sample_rate=%d)",
169
+ self._config.model_name,
170
+ load_duration,
171
+ self._sample_rate,
172
+ )
173
+
174
+ return load_duration
175
+
176
+ async def unload(self) -> None:
177
+ """Unload the model and free memory."""
178
+ if self._voice is not None:
179
+ logger.info("Unloading Piper model %s", self._config.model_name)
180
+ self._voice = None
181
+ self._device = None
182
+
183
+ async def synthesize(
184
+ self,
185
+ text: str,
186
+ *,
187
+ voice: str | None = None, # noqa: ARG002
188
+ speed: float = 1.0,
189
+ ) -> SynthesisResult:
190
+ """Synthesize text to audio.
191
+
192
+ Args:
193
+ text: Text to synthesize.
194
+ voice: Voice to use (not used for Piper - voice is the model).
195
+ speed: Speech speed multiplier (0.25 to 4.0).
196
+
197
+ Returns:
198
+ SynthesisResult with audio data and metadata.
199
+
200
+ Raises:
201
+ InvalidTextError: If the text is empty or invalid.
202
+ RuntimeError: If the model is not loaded.
203
+
204
+ """
205
+ if self._voice is None:
206
+ msg = "Model not loaded"
207
+ raise RuntimeError(msg)
208
+
209
+ if not text or not text.strip():
210
+ msg = "Text cannot be empty"
211
+ raise InvalidTextError(msg)
212
+
213
+ # Convert speed to length_scale (inverse relationship)
214
+ # Speed is already validated/clamped by the API layer
215
+ # length_scale < 1.0 = faster, > 1.0 = slower
216
+ length_scale = 1.0 / speed
217
+
218
+ # Run synthesis in executor to avoid blocking.
219
+ # Thread-safe: ONNX Runtime InferenceSession.run() is thread-safe since v1.10+,
220
+ # so concurrent requests can share the same PiperVoice instance safely.
221
+ loop = asyncio.get_running_loop()
222
+ audio_data, duration = await loop.run_in_executor(
223
+ None,
224
+ _synthesize_sync,
225
+ self._voice,
226
+ text,
227
+ self._sample_rate,
228
+ length_scale,
229
+ )
230
+
231
+ return SynthesisResult(
232
+ audio=audio_data,
233
+ sample_rate=self._sample_rate,
234
+ sample_width=2, # 16-bit
235
+ channels=1, # Mono
236
+ duration=duration,
237
+ )
238
+
239
+ @property
240
+ def supports_streaming(self) -> bool:
241
+ """Piper backend does not support streaming synthesis."""
242
+ return False
243
+
244
+ def synthesize_stream(
245
+ self,
246
+ text: str,
247
+ *,
248
+ voice: str | None = None,
249
+ speed: float = 1.0,
250
+ ) -> NoReturn:
251
+ """Streaming is not supported by Piper backend."""
252
+ msg = "Streaming synthesis is not supported by Piper backend"
253
+ raise NotImplementedError(msg)