agent-cli 0.70.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- agent_cli/__init__.py +5 -0
- agent_cli/__main__.py +6 -0
- agent_cli/_extras.json +14 -0
- agent_cli/_requirements/.gitkeep +0 -0
- agent_cli/_requirements/audio.txt +79 -0
- agent_cli/_requirements/faster-whisper.txt +215 -0
- agent_cli/_requirements/kokoro.txt +425 -0
- agent_cli/_requirements/llm.txt +183 -0
- agent_cli/_requirements/memory.txt +355 -0
- agent_cli/_requirements/mlx-whisper.txt +222 -0
- agent_cli/_requirements/piper.txt +176 -0
- agent_cli/_requirements/rag.txt +402 -0
- agent_cli/_requirements/server.txt +154 -0
- agent_cli/_requirements/speed.txt +77 -0
- agent_cli/_requirements/vad.txt +155 -0
- agent_cli/_requirements/wyoming.txt +71 -0
- agent_cli/_tools.py +368 -0
- agent_cli/agents/__init__.py +23 -0
- agent_cli/agents/_voice_agent_common.py +136 -0
- agent_cli/agents/assistant.py +383 -0
- agent_cli/agents/autocorrect.py +284 -0
- agent_cli/agents/chat.py +496 -0
- agent_cli/agents/memory/__init__.py +31 -0
- agent_cli/agents/memory/add.py +190 -0
- agent_cli/agents/memory/proxy.py +160 -0
- agent_cli/agents/rag_proxy.py +128 -0
- agent_cli/agents/speak.py +209 -0
- agent_cli/agents/transcribe.py +671 -0
- agent_cli/agents/transcribe_daemon.py +499 -0
- agent_cli/agents/voice_edit.py +291 -0
- agent_cli/api.py +22 -0
- agent_cli/cli.py +106 -0
- agent_cli/config.py +503 -0
- agent_cli/config_cmd.py +307 -0
- agent_cli/constants.py +27 -0
- agent_cli/core/__init__.py +1 -0
- agent_cli/core/audio.py +461 -0
- agent_cli/core/audio_format.py +299 -0
- agent_cli/core/chroma.py +88 -0
- agent_cli/core/deps.py +191 -0
- agent_cli/core/openai_proxy.py +139 -0
- agent_cli/core/process.py +195 -0
- agent_cli/core/reranker.py +120 -0
- agent_cli/core/sse.py +87 -0
- agent_cli/core/transcription_logger.py +70 -0
- agent_cli/core/utils.py +526 -0
- agent_cli/core/vad.py +175 -0
- agent_cli/core/watch.py +65 -0
- agent_cli/dev/__init__.py +14 -0
- agent_cli/dev/cli.py +1588 -0
- agent_cli/dev/coding_agents/__init__.py +19 -0
- agent_cli/dev/coding_agents/aider.py +24 -0
- agent_cli/dev/coding_agents/base.py +167 -0
- agent_cli/dev/coding_agents/claude.py +39 -0
- agent_cli/dev/coding_agents/codex.py +24 -0
- agent_cli/dev/coding_agents/continue_dev.py +15 -0
- agent_cli/dev/coding_agents/copilot.py +24 -0
- agent_cli/dev/coding_agents/cursor_agent.py +48 -0
- agent_cli/dev/coding_agents/gemini.py +28 -0
- agent_cli/dev/coding_agents/opencode.py +15 -0
- agent_cli/dev/coding_agents/registry.py +49 -0
- agent_cli/dev/editors/__init__.py +19 -0
- agent_cli/dev/editors/base.py +89 -0
- agent_cli/dev/editors/cursor.py +15 -0
- agent_cli/dev/editors/emacs.py +46 -0
- agent_cli/dev/editors/jetbrains.py +56 -0
- agent_cli/dev/editors/nano.py +31 -0
- agent_cli/dev/editors/neovim.py +33 -0
- agent_cli/dev/editors/registry.py +59 -0
- agent_cli/dev/editors/sublime.py +20 -0
- agent_cli/dev/editors/vim.py +42 -0
- agent_cli/dev/editors/vscode.py +15 -0
- agent_cli/dev/editors/zed.py +20 -0
- agent_cli/dev/project.py +568 -0
- agent_cli/dev/registry.py +52 -0
- agent_cli/dev/skill/SKILL.md +141 -0
- agent_cli/dev/skill/examples.md +571 -0
- agent_cli/dev/terminals/__init__.py +19 -0
- agent_cli/dev/terminals/apple_terminal.py +82 -0
- agent_cli/dev/terminals/base.py +56 -0
- agent_cli/dev/terminals/gnome.py +51 -0
- agent_cli/dev/terminals/iterm2.py +84 -0
- agent_cli/dev/terminals/kitty.py +77 -0
- agent_cli/dev/terminals/registry.py +48 -0
- agent_cli/dev/terminals/tmux.py +58 -0
- agent_cli/dev/terminals/warp.py +132 -0
- agent_cli/dev/terminals/zellij.py +78 -0
- agent_cli/dev/worktree.py +856 -0
- agent_cli/docs_gen.py +417 -0
- agent_cli/example-config.toml +185 -0
- agent_cli/install/__init__.py +5 -0
- agent_cli/install/common.py +89 -0
- agent_cli/install/extras.py +174 -0
- agent_cli/install/hotkeys.py +48 -0
- agent_cli/install/services.py +87 -0
- agent_cli/memory/__init__.py +7 -0
- agent_cli/memory/_files.py +250 -0
- agent_cli/memory/_filters.py +63 -0
- agent_cli/memory/_git.py +157 -0
- agent_cli/memory/_indexer.py +142 -0
- agent_cli/memory/_ingest.py +408 -0
- agent_cli/memory/_persistence.py +182 -0
- agent_cli/memory/_prompt.py +91 -0
- agent_cli/memory/_retrieval.py +294 -0
- agent_cli/memory/_store.py +169 -0
- agent_cli/memory/_streaming.py +44 -0
- agent_cli/memory/_tasks.py +48 -0
- agent_cli/memory/api.py +113 -0
- agent_cli/memory/client.py +272 -0
- agent_cli/memory/engine.py +361 -0
- agent_cli/memory/entities.py +43 -0
- agent_cli/memory/models.py +112 -0
- agent_cli/opts.py +433 -0
- agent_cli/py.typed +0 -0
- agent_cli/rag/__init__.py +3 -0
- agent_cli/rag/_indexer.py +67 -0
- agent_cli/rag/_indexing.py +226 -0
- agent_cli/rag/_prompt.py +30 -0
- agent_cli/rag/_retriever.py +156 -0
- agent_cli/rag/_store.py +48 -0
- agent_cli/rag/_utils.py +218 -0
- agent_cli/rag/api.py +175 -0
- agent_cli/rag/client.py +299 -0
- agent_cli/rag/engine.py +302 -0
- agent_cli/rag/models.py +55 -0
- agent_cli/scripts/.runtime/.gitkeep +0 -0
- agent_cli/scripts/__init__.py +1 -0
- agent_cli/scripts/check_plugin_skill_sync.py +50 -0
- agent_cli/scripts/linux-hotkeys/README.md +63 -0
- agent_cli/scripts/linux-hotkeys/toggle-autocorrect.sh +45 -0
- agent_cli/scripts/linux-hotkeys/toggle-transcription.sh +58 -0
- agent_cli/scripts/linux-hotkeys/toggle-voice-edit.sh +58 -0
- agent_cli/scripts/macos-hotkeys/README.md +45 -0
- agent_cli/scripts/macos-hotkeys/skhd-config-example +5 -0
- agent_cli/scripts/macos-hotkeys/toggle-autocorrect.sh +12 -0
- agent_cli/scripts/macos-hotkeys/toggle-transcription.sh +37 -0
- agent_cli/scripts/macos-hotkeys/toggle-voice-edit.sh +37 -0
- agent_cli/scripts/nvidia-asr-server/README.md +99 -0
- agent_cli/scripts/nvidia-asr-server/pyproject.toml +27 -0
- agent_cli/scripts/nvidia-asr-server/server.py +255 -0
- agent_cli/scripts/nvidia-asr-server/shell.nix +32 -0
- agent_cli/scripts/nvidia-asr-server/uv.lock +4654 -0
- agent_cli/scripts/run-openwakeword.sh +11 -0
- agent_cli/scripts/run-piper-windows.ps1 +30 -0
- agent_cli/scripts/run-piper.sh +24 -0
- agent_cli/scripts/run-whisper-linux.sh +40 -0
- agent_cli/scripts/run-whisper-macos.sh +6 -0
- agent_cli/scripts/run-whisper-windows.ps1 +51 -0
- agent_cli/scripts/run-whisper.sh +9 -0
- agent_cli/scripts/run_faster_whisper_server.py +136 -0
- agent_cli/scripts/setup-linux-hotkeys.sh +72 -0
- agent_cli/scripts/setup-linux.sh +108 -0
- agent_cli/scripts/setup-macos-hotkeys.sh +61 -0
- agent_cli/scripts/setup-macos.sh +76 -0
- agent_cli/scripts/setup-windows.ps1 +63 -0
- agent_cli/scripts/start-all-services-windows.ps1 +53 -0
- agent_cli/scripts/start-all-services.sh +178 -0
- agent_cli/scripts/sync_extras.py +138 -0
- agent_cli/server/__init__.py +3 -0
- agent_cli/server/cli.py +721 -0
- agent_cli/server/common.py +222 -0
- agent_cli/server/model_manager.py +288 -0
- agent_cli/server/model_registry.py +225 -0
- agent_cli/server/proxy/__init__.py +3 -0
- agent_cli/server/proxy/api.py +444 -0
- agent_cli/server/streaming.py +67 -0
- agent_cli/server/tts/__init__.py +3 -0
- agent_cli/server/tts/api.py +335 -0
- agent_cli/server/tts/backends/__init__.py +82 -0
- agent_cli/server/tts/backends/base.py +139 -0
- agent_cli/server/tts/backends/kokoro.py +403 -0
- agent_cli/server/tts/backends/piper.py +253 -0
- agent_cli/server/tts/model_manager.py +201 -0
- agent_cli/server/tts/model_registry.py +28 -0
- agent_cli/server/tts/wyoming_handler.py +249 -0
- agent_cli/server/whisper/__init__.py +3 -0
- agent_cli/server/whisper/api.py +413 -0
- agent_cli/server/whisper/backends/__init__.py +89 -0
- agent_cli/server/whisper/backends/base.py +97 -0
- agent_cli/server/whisper/backends/faster_whisper.py +225 -0
- agent_cli/server/whisper/backends/mlx.py +270 -0
- agent_cli/server/whisper/languages.py +116 -0
- agent_cli/server/whisper/model_manager.py +157 -0
- agent_cli/server/whisper/model_registry.py +28 -0
- agent_cli/server/whisper/wyoming_handler.py +203 -0
- agent_cli/services/__init__.py +343 -0
- agent_cli/services/_wyoming_utils.py +64 -0
- agent_cli/services/asr.py +506 -0
- agent_cli/services/llm.py +228 -0
- agent_cli/services/tts.py +450 -0
- agent_cli/services/wake_word.py +142 -0
- agent_cli-0.70.5.dist-info/METADATA +2118 -0
- agent_cli-0.70.5.dist-info/RECORD +196 -0
- agent_cli-0.70.5.dist-info/WHEEL +4 -0
- agent_cli-0.70.5.dist-info/entry_points.txt +4 -0
- agent_cli-0.70.5.dist-info/licenses/LICENSE +21 -0
|
@@ -0,0 +1,201 @@
|
|
|
1
|
+
"""TTS model manager with TTL-based unloading."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
import time
|
|
7
|
+
from dataclasses import dataclass
|
|
8
|
+
from typing import TYPE_CHECKING
|
|
9
|
+
|
|
10
|
+
from agent_cli import constants
|
|
11
|
+
from agent_cli.server.model_manager import ModelConfig, ModelManager, ModelStats
|
|
12
|
+
from agent_cli.server.tts.backends import (
|
|
13
|
+
BackendConfig,
|
|
14
|
+
BackendType,
|
|
15
|
+
SynthesisResult,
|
|
16
|
+
create_backend,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
if TYPE_CHECKING:
|
|
20
|
+
from collections.abc import AsyncIterator
|
|
21
|
+
|
|
22
|
+
from agent_cli.server.tts.backends.base import TTSBackend
|
|
23
|
+
|
|
24
|
+
logger = logging.getLogger(__name__)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
@dataclass
|
|
28
|
+
class TTSModelConfig(ModelConfig):
|
|
29
|
+
"""Configuration for a TTS model."""
|
|
30
|
+
|
|
31
|
+
backend_type: BackendType = "auto"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class TTSModelManager:
|
|
35
|
+
"""Manages a TTS model with TTL-based unloading.
|
|
36
|
+
|
|
37
|
+
Thin wrapper around ModelManager that adds the synthesize() method.
|
|
38
|
+
"""
|
|
39
|
+
|
|
40
|
+
def __init__(self, config: TTSModelConfig) -> None:
|
|
41
|
+
"""Initialize the TTS model manager."""
|
|
42
|
+
self.config = config
|
|
43
|
+
backend = create_backend(
|
|
44
|
+
BackendConfig(
|
|
45
|
+
model_name=config.model_name,
|
|
46
|
+
device=config.device,
|
|
47
|
+
cache_dir=config.cache_dir,
|
|
48
|
+
),
|
|
49
|
+
backend_type=config.backend_type,
|
|
50
|
+
)
|
|
51
|
+
self._manager = ModelManager(backend, config)
|
|
52
|
+
|
|
53
|
+
@property
|
|
54
|
+
def stats(self) -> ModelStats:
|
|
55
|
+
"""Get the model statistics."""
|
|
56
|
+
return self._manager.stats
|
|
57
|
+
|
|
58
|
+
@property
|
|
59
|
+
def is_loaded(self) -> bool:
|
|
60
|
+
"""Check if the model is currently loaded."""
|
|
61
|
+
return self._manager.is_loaded
|
|
62
|
+
|
|
63
|
+
@property
|
|
64
|
+
def device(self) -> str | None:
|
|
65
|
+
"""Get the device the model is loaded on."""
|
|
66
|
+
return self._manager.device
|
|
67
|
+
|
|
68
|
+
@property
|
|
69
|
+
def active_requests(self) -> int:
|
|
70
|
+
"""Get the number of active requests."""
|
|
71
|
+
return self._manager.active_requests
|
|
72
|
+
|
|
73
|
+
@property
|
|
74
|
+
def ttl_remaining(self) -> float | None:
|
|
75
|
+
"""Get seconds remaining before model unloads."""
|
|
76
|
+
return self._manager.ttl_remaining
|
|
77
|
+
|
|
78
|
+
async def start(self) -> None:
|
|
79
|
+
"""Start the TTL unload watcher."""
|
|
80
|
+
await self._manager.start()
|
|
81
|
+
|
|
82
|
+
async def stop(self) -> None:
|
|
83
|
+
"""Stop the manager and unload the model."""
|
|
84
|
+
await self._manager.stop()
|
|
85
|
+
|
|
86
|
+
async def get_model(self) -> TTSBackend:
|
|
87
|
+
"""Get the backend, loading it if necessary."""
|
|
88
|
+
return await self._manager.get_model()
|
|
89
|
+
|
|
90
|
+
async def unload(self) -> bool:
|
|
91
|
+
"""Unload the model from memory."""
|
|
92
|
+
return await self._manager.unload()
|
|
93
|
+
|
|
94
|
+
def _update_stats(self, text: str, synthesis_duration: float) -> None:
|
|
95
|
+
"""Update synthesis statistics."""
|
|
96
|
+
stats = self._manager.stats
|
|
97
|
+
stats.total_requests += 1
|
|
98
|
+
stats.total_processing_seconds += synthesis_duration
|
|
99
|
+
stats.extra["total_characters"] = stats.extra.get("total_characters", 0.0) + len(text)
|
|
100
|
+
stats.extra["total_synthesis_seconds"] = (
|
|
101
|
+
stats.extra.get("total_synthesis_seconds", 0.0) + synthesis_duration
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
async def synthesize(
|
|
105
|
+
self,
|
|
106
|
+
text: str,
|
|
107
|
+
*,
|
|
108
|
+
voice: str | None = None,
|
|
109
|
+
speed: float = 1.0,
|
|
110
|
+
) -> SynthesisResult:
|
|
111
|
+
"""Synthesize text to audio.
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
text: Text to synthesize.
|
|
115
|
+
voice: Voice to use (optional).
|
|
116
|
+
speed: Speech speed multiplier (0.25 to 4.0).
|
|
117
|
+
|
|
118
|
+
Returns:
|
|
119
|
+
SynthesisResult with audio data and metadata.
|
|
120
|
+
|
|
121
|
+
"""
|
|
122
|
+
start_time = time.time()
|
|
123
|
+
|
|
124
|
+
async with self._manager.request():
|
|
125
|
+
backend: TTSBackend = self._manager.backend # type: ignore[assignment]
|
|
126
|
+
result = await backend.synthesize(
|
|
127
|
+
text,
|
|
128
|
+
voice=voice,
|
|
129
|
+
speed=speed,
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
synthesis_duration = time.time() - start_time
|
|
133
|
+
|
|
134
|
+
self._update_stats(text, synthesis_duration)
|
|
135
|
+
self._manager.stats.total_audio_seconds += result.duration
|
|
136
|
+
|
|
137
|
+
logger.debug(
|
|
138
|
+
"Synthesized %d chars to %.1fs audio in %.2fs (model=%s)",
|
|
139
|
+
len(text),
|
|
140
|
+
result.duration,
|
|
141
|
+
synthesis_duration,
|
|
142
|
+
self.config.model_name,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
return result
|
|
146
|
+
|
|
147
|
+
@property
|
|
148
|
+
def supports_streaming(self) -> bool:
|
|
149
|
+
"""Check if the backend supports streaming synthesis."""
|
|
150
|
+
backend: TTSBackend = self._manager.backend # type: ignore[assignment]
|
|
151
|
+
return backend.supports_streaming
|
|
152
|
+
|
|
153
|
+
async def synthesize_stream(
|
|
154
|
+
self,
|
|
155
|
+
text: str,
|
|
156
|
+
*,
|
|
157
|
+
voice: str | None = None,
|
|
158
|
+
speed: float = 1.0,
|
|
159
|
+
) -> AsyncIterator[bytes]:
|
|
160
|
+
"""Stream synthesized audio chunks as they are generated."""
|
|
161
|
+
start_time = time.time()
|
|
162
|
+
chunk_count = 0
|
|
163
|
+
total_bytes = 0
|
|
164
|
+
|
|
165
|
+
async with self._manager.request():
|
|
166
|
+
backend: TTSBackend = self._manager.backend # type: ignore[assignment]
|
|
167
|
+
|
|
168
|
+
if not backend.supports_streaming:
|
|
169
|
+
msg = "Backend does not support streaming"
|
|
170
|
+
raise RuntimeError(msg)
|
|
171
|
+
|
|
172
|
+
async for chunk in backend.synthesize_stream(
|
|
173
|
+
text,
|
|
174
|
+
voice=voice,
|
|
175
|
+
speed=speed,
|
|
176
|
+
):
|
|
177
|
+
chunk_count += 1
|
|
178
|
+
total_bytes += len(chunk)
|
|
179
|
+
yield chunk
|
|
180
|
+
|
|
181
|
+
synthesis_duration = time.time() - start_time
|
|
182
|
+
|
|
183
|
+
# Calculate audio duration from PCM bytes (16-bit mono)
|
|
184
|
+
bytes_per_second = constants.KOKORO_DEFAULT_SAMPLE_RATE * 2 # 2 bytes per sample
|
|
185
|
+
audio_seconds = total_bytes / bytes_per_second
|
|
186
|
+
|
|
187
|
+
self._update_stats(text, synthesis_duration)
|
|
188
|
+
self._manager.stats.total_audio_seconds += audio_seconds
|
|
189
|
+
self._manager.stats.extra["streaming_requests"] = (
|
|
190
|
+
self._manager.stats.extra.get("streaming_requests", 0) + 1
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
logger.debug(
|
|
194
|
+
"Streamed %d chars to %.1fs audio in %d chunks (%d bytes) in %.2fs (model=%s)",
|
|
195
|
+
len(text),
|
|
196
|
+
audio_seconds,
|
|
197
|
+
chunk_count,
|
|
198
|
+
total_bytes,
|
|
199
|
+
synthesis_duration,
|
|
200
|
+
self.config.model_name,
|
|
201
|
+
)
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
"""Registry for managing multiple TTS models."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from agent_cli.server.model_registry import ModelRegistry
|
|
6
|
+
from agent_cli.server.tts.model_manager import TTSModelConfig, TTSModelManager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def create_tts_registry(
|
|
10
|
+
default_model: str | None = None,
|
|
11
|
+
) -> ModelRegistry[TTSModelManager, TTSModelConfig]:
|
|
12
|
+
"""Create a TTS model registry.
|
|
13
|
+
|
|
14
|
+
Args:
|
|
15
|
+
default_model: Name of the default model to use when not specified.
|
|
16
|
+
|
|
17
|
+
Returns:
|
|
18
|
+
Configured ModelRegistry for TTS models.
|
|
19
|
+
|
|
20
|
+
"""
|
|
21
|
+
return ModelRegistry(
|
|
22
|
+
manager_factory=TTSModelManager,
|
|
23
|
+
default_model=default_model,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
# Alias for type hints
|
|
28
|
+
TTSModelRegistry = ModelRegistry[TTSModelManager, TTSModelConfig]
|
|
@@ -0,0 +1,249 @@
|
|
|
1
|
+
"""Wyoming protocol handler for TTS server."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import logging
|
|
6
|
+
from functools import partial
|
|
7
|
+
from typing import TYPE_CHECKING
|
|
8
|
+
|
|
9
|
+
from wyoming.audio import AudioChunk, AudioStart, AudioStop
|
|
10
|
+
from wyoming.info import Attribution, Describe, Info, TtsProgram, TtsVoice
|
|
11
|
+
from wyoming.server import AsyncEventHandler, AsyncServer
|
|
12
|
+
from wyoming.tts import Synthesize
|
|
13
|
+
|
|
14
|
+
from agent_cli import constants
|
|
15
|
+
|
|
16
|
+
if TYPE_CHECKING:
|
|
17
|
+
from wyoming.event import Event
|
|
18
|
+
|
|
19
|
+
from agent_cli.server.tts.model_manager import TTSModelManager
|
|
20
|
+
from agent_cli.server.tts.model_registry import TTSModelRegistry
|
|
21
|
+
|
|
22
|
+
logger = logging.getLogger(__name__)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class WyomingTTSHandler(AsyncEventHandler):
|
|
26
|
+
"""Wyoming event handler for TTS.
|
|
27
|
+
|
|
28
|
+
Handles the Wyoming protocol for TTS (Text-to-Speech):
|
|
29
|
+
- Receives Synthesize event with text
|
|
30
|
+
- Synthesizes audio
|
|
31
|
+
- Returns AudioStart, AudioChunk(s), AudioStop
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
registry: TTSModelRegistry,
|
|
37
|
+
*args: object,
|
|
38
|
+
**kwargs: object,
|
|
39
|
+
) -> None:
|
|
40
|
+
"""Initialize the handler.
|
|
41
|
+
|
|
42
|
+
Args:
|
|
43
|
+
registry: Model registry for getting TTS models.
|
|
44
|
+
*args: Passed to parent class.
|
|
45
|
+
**kwargs: Passed to parent class.
|
|
46
|
+
|
|
47
|
+
"""
|
|
48
|
+
super().__init__(*args, **kwargs)
|
|
49
|
+
self._registry = registry
|
|
50
|
+
|
|
51
|
+
async def handle_event(self, event: Event) -> bool:
|
|
52
|
+
"""Handle a Wyoming event.
|
|
53
|
+
|
|
54
|
+
Args:
|
|
55
|
+
event: The event to handle.
|
|
56
|
+
|
|
57
|
+
Returns:
|
|
58
|
+
True to continue processing events, False to stop.
|
|
59
|
+
|
|
60
|
+
"""
|
|
61
|
+
if Synthesize.is_type(event.type):
|
|
62
|
+
return await self._handle_synthesize(event)
|
|
63
|
+
|
|
64
|
+
if Describe.is_type(event.type):
|
|
65
|
+
return await self._handle_describe()
|
|
66
|
+
|
|
67
|
+
return True
|
|
68
|
+
|
|
69
|
+
async def _handle_synthesize(self, event: Event) -> bool:
|
|
70
|
+
"""Handle synthesize event - synthesize text to audio."""
|
|
71
|
+
synthesize = Synthesize.from_event(event)
|
|
72
|
+
text = synthesize.text
|
|
73
|
+
|
|
74
|
+
logger.debug("Synthesize: %s", text[:100] if text else "")
|
|
75
|
+
|
|
76
|
+
if not text:
|
|
77
|
+
logger.warning("Empty text received")
|
|
78
|
+
# Send empty audio response
|
|
79
|
+
await self.write_event(
|
|
80
|
+
AudioStart(
|
|
81
|
+
rate=constants.PIPER_DEFAULT_SAMPLE_RATE,
|
|
82
|
+
width=2,
|
|
83
|
+
channels=1,
|
|
84
|
+
).event(),
|
|
85
|
+
)
|
|
86
|
+
await self.write_event(AudioStop().event())
|
|
87
|
+
return False
|
|
88
|
+
|
|
89
|
+
try:
|
|
90
|
+
manager = self._registry.get_manager()
|
|
91
|
+
|
|
92
|
+
if manager.supports_streaming:
|
|
93
|
+
await self._synthesize_streaming(manager, text, synthesize.voice)
|
|
94
|
+
else:
|
|
95
|
+
await self._synthesize_complete(manager, text, synthesize.voice)
|
|
96
|
+
|
|
97
|
+
except Exception:
|
|
98
|
+
logger.exception("Wyoming synthesis failed")
|
|
99
|
+
# Send empty audio on error
|
|
100
|
+
await self.write_event(
|
|
101
|
+
AudioStart(
|
|
102
|
+
rate=constants.PIPER_DEFAULT_SAMPLE_RATE,
|
|
103
|
+
width=2,
|
|
104
|
+
channels=1,
|
|
105
|
+
).event(),
|
|
106
|
+
)
|
|
107
|
+
await self.write_event(AudioStop().event())
|
|
108
|
+
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
async def _synthesize_streaming(
|
|
112
|
+
self,
|
|
113
|
+
manager: TTSModelManager,
|
|
114
|
+
text: str,
|
|
115
|
+
voice: str | None,
|
|
116
|
+
) -> None:
|
|
117
|
+
"""Stream audio chunks as they're generated."""
|
|
118
|
+
sample_rate = constants.KOKORO_DEFAULT_SAMPLE_RATE
|
|
119
|
+
|
|
120
|
+
# Send audio start
|
|
121
|
+
await self.write_event(
|
|
122
|
+
AudioStart(rate=sample_rate, width=2, channels=1).event(),
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
chunk_count = 0
|
|
126
|
+
total_bytes = 0
|
|
127
|
+
async for chunk in manager.synthesize_stream(text, voice=voice, speed=1.0):
|
|
128
|
+
await self.write_event(
|
|
129
|
+
AudioChunk(audio=chunk, rate=sample_rate, width=2, channels=1).event(),
|
|
130
|
+
)
|
|
131
|
+
chunk_count += 1
|
|
132
|
+
total_bytes += len(chunk)
|
|
133
|
+
|
|
134
|
+
await self.write_event(AudioStop().event())
|
|
135
|
+
|
|
136
|
+
# Calculate duration from PCM bytes (16-bit mono)
|
|
137
|
+
duration = total_bytes / (sample_rate * 2)
|
|
138
|
+
logger.info(
|
|
139
|
+
"Wyoming streaming synthesis: %d chars -> %.1fs audio in %d chunks",
|
|
140
|
+
len(text),
|
|
141
|
+
duration,
|
|
142
|
+
chunk_count,
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
async def _synthesize_complete(
|
|
146
|
+
self,
|
|
147
|
+
manager: TTSModelManager,
|
|
148
|
+
text: str,
|
|
149
|
+
voice: str | None,
|
|
150
|
+
) -> None:
|
|
151
|
+
"""Synthesize complete audio then send in chunks."""
|
|
152
|
+
result = await manager.synthesize(text, voice=voice, speed=1.0)
|
|
153
|
+
|
|
154
|
+
# Send audio start
|
|
155
|
+
await self.write_event(
|
|
156
|
+
AudioStart(
|
|
157
|
+
rate=result.sample_rate,
|
|
158
|
+
width=result.sample_width,
|
|
159
|
+
channels=result.channels,
|
|
160
|
+
).event(),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Send audio data - skip WAV header to get raw PCM
|
|
164
|
+
pcm_data = (
|
|
165
|
+
result.audio[constants.WAV_HEADER_SIZE :]
|
|
166
|
+
if len(result.audio) > constants.WAV_HEADER_SIZE
|
|
167
|
+
else result.audio
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# Send in chunks
|
|
171
|
+
chunk_size = 4096
|
|
172
|
+
for i in range(0, len(pcm_data), chunk_size):
|
|
173
|
+
chunk = pcm_data[i : i + chunk_size]
|
|
174
|
+
await self.write_event(
|
|
175
|
+
AudioChunk(
|
|
176
|
+
audio=chunk,
|
|
177
|
+
rate=result.sample_rate,
|
|
178
|
+
width=result.sample_width,
|
|
179
|
+
channels=result.channels,
|
|
180
|
+
).event(),
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
await self.write_event(AudioStop().event())
|
|
184
|
+
|
|
185
|
+
logger.info(
|
|
186
|
+
"Wyoming synthesis: %d chars -> %.1fs audio",
|
|
187
|
+
len(text),
|
|
188
|
+
result.duration,
|
|
189
|
+
)
|
|
190
|
+
|
|
191
|
+
async def _handle_describe(self) -> bool:
|
|
192
|
+
"""Handle describe event - return server capabilities."""
|
|
193
|
+
logger.debug("Describe event")
|
|
194
|
+
|
|
195
|
+
# Get list of available models as voices
|
|
196
|
+
voices = [
|
|
197
|
+
TtsVoice(
|
|
198
|
+
name=status.name,
|
|
199
|
+
description=f"Piper TTS {status.name}",
|
|
200
|
+
attribution=Attribution(
|
|
201
|
+
name="Piper",
|
|
202
|
+
url="https://github.com/rhasspy/piper",
|
|
203
|
+
),
|
|
204
|
+
installed=True,
|
|
205
|
+
# Extract language from model name (e.g., "en_US-lessac-medium" -> "en")
|
|
206
|
+
languages=[status.name.split("_")[0] if "_" in status.name else "en"],
|
|
207
|
+
version="1.0",
|
|
208
|
+
)
|
|
209
|
+
for status in self._registry.list_status()
|
|
210
|
+
]
|
|
211
|
+
|
|
212
|
+
await self.write_event(
|
|
213
|
+
Info(
|
|
214
|
+
tts=[
|
|
215
|
+
TtsProgram(
|
|
216
|
+
name="agent-cli-tts",
|
|
217
|
+
description="Agent CLI TTS Server with TTL-based model unloading",
|
|
218
|
+
attribution=Attribution(
|
|
219
|
+
name="agent-cli",
|
|
220
|
+
url="https://github.com/basnijholt/agent-cli",
|
|
221
|
+
),
|
|
222
|
+
installed=True,
|
|
223
|
+
version="1.0",
|
|
224
|
+
voices=voices,
|
|
225
|
+
),
|
|
226
|
+
],
|
|
227
|
+
).event(),
|
|
228
|
+
)
|
|
229
|
+
return True
|
|
230
|
+
|
|
231
|
+
|
|
232
|
+
async def start_wyoming_server(
|
|
233
|
+
registry: TTSModelRegistry,
|
|
234
|
+
uri: str = "tcp://0.0.0.0:10200",
|
|
235
|
+
) -> None:
|
|
236
|
+
"""Start the Wyoming TTS server.
|
|
237
|
+
|
|
238
|
+
Args:
|
|
239
|
+
registry: Model registry for synthesis.
|
|
240
|
+
uri: URI to bind the server to (e.g., "tcp://0.0.0.0:10200").
|
|
241
|
+
|
|
242
|
+
"""
|
|
243
|
+
server = AsyncServer.from_uri(uri)
|
|
244
|
+
logger.debug("Wyoming TTS server listening on %s", uri)
|
|
245
|
+
|
|
246
|
+
# Create handler factory with registry
|
|
247
|
+
handler_factory = partial(WyomingTTSHandler, registry)
|
|
248
|
+
|
|
249
|
+
await server.run(handler_factory)
|