smart-tts 1.9.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. smart_tts-1.9.0/.env.example +13 -0
  2. smart_tts-1.9.0/.github/workflows/publish.yml +83 -0
  3. smart_tts-1.9.0/.gitignore +72 -0
  4. smart_tts-1.9.0/.vscode/launch.json +23 -0
  5. smart_tts-1.9.0/LICENSE +21 -0
  6. smart_tts-1.9.0/PKG-INFO +394 -0
  7. smart_tts-1.9.0/README.md +362 -0
  8. smart_tts-1.9.0/example.py +194 -0
  9. smart_tts-1.9.0/pyproject.toml +68 -0
  10. smart_tts-1.9.0/smart_tts/__init__.py +44 -0
  11. smart_tts-1.9.0/smart_tts/_version.py +24 -0
  12. smart_tts-1.9.0/smart_tts/async_tts.py +321 -0
  13. smart_tts-1.9.0/smart_tts/audio/__init__.py +4 -0
  14. smart_tts-1.9.0/smart_tts/audio/mixer.py +117 -0
  15. smart_tts-1.9.0/smart_tts/audio/probe.py +32 -0
  16. smart_tts-1.9.0/smart_tts/client/__init__.py +0 -0
  17. smart_tts-1.9.0/smart_tts/client/elevenlabs.py +159 -0
  18. smart_tts-1.9.0/smart_tts/client/fish.py +185 -0
  19. smart_tts-1.9.0/smart_tts/config.py +63 -0
  20. smart_tts-1.9.0/smart_tts/exceptions.py +39 -0
  21. smart_tts-1.9.0/smart_tts/models.py +136 -0
  22. smart_tts-1.9.0/smart_tts/script/__init__.py +3 -0
  23. smart_tts-1.9.0/smart_tts/script/breaks.py +44 -0
  24. smart_tts-1.9.0/smart_tts/templates.py +154 -0
  25. smart_tts-1.9.0/smart_tts/text.py +26 -0
  26. smart_tts-1.9.0/smart_tts/tts.py +331 -0
  27. smart_tts-1.9.0/smart_tts/voices/__init__.py +3 -0
  28. smart_tts-1.9.0/smart_tts/voices/registry.py +144 -0
  29. smart_tts-1.9.0/spec.md +435 -0
  30. smart_tts-1.9.0/templates/investigation.json +21 -0
  31. smart_tts-1.9.0/tests/conftest.py +40 -0
  32. smart_tts-1.9.0/tests/test_async_smart_tts.py +54 -0
  33. smart_tts-1.9.0/tests/test_breaks.py +14 -0
  34. smart_tts-1.9.0/tests/test_config.py +33 -0
  35. smart_tts-1.9.0/tests/test_fish_client.py +67 -0
  36. smart_tts-1.9.0/tests/test_smart_tts.py +72 -0
  37. smart_tts-1.9.0/tests/test_templates.py +126 -0
  38. smart_tts-1.9.0/uv.lock +583 -0
@@ -0,0 +1,13 @@
1
+ # Required
2
+ FISH_API_KEY=your_fish_api_key
3
+
4
+ # Optional — for music/ambient generation and mixing
5
+ ELEVENLABS_API_KEY=your_elevenlabs_api_key
6
+
7
+ # Optional
8
+ # Use s2.1-pro-free if you have no paid Fish credits (skips 402 fallback)
9
+ FISH_DEFAULT_MODEL=s2.1-pro
10
+ FISH_DEFAULT_VOICE_ID=67d37d81cb7340b391e9461d6671de03
11
+ ELEVENLABS_CACHE_DIR=~/.cache/smart-tts
12
+ ELEVENLABS_DEFAULT_OUTPUT_FORMAT=mp3_44100_128
13
+ FISH_API_URL=https://api.fish.audio/v1/tts
@@ -0,0 +1,83 @@
1
+ name: Publish to PyPI
2
+
3
+ on:
4
+ push:
5
+ tags:
6
+ - "v*"
7
+ workflow_dispatch:
8
+ inputs:
9
+ tag:
10
+ description: "Git tag to publish (e.g. v0.1.0)"
11
+ required: true
12
+ type: string
13
+
14
+ permissions:
15
+ contents: read
16
+ id-token: write
17
+
18
+ jobs:
19
+ publish:
20
+ runs-on: ubuntu-latest
21
+ environment:
22
+ name: pypi
23
+ url: https://pypi.org/p/smart-tts
24
+
25
+ steps:
26
+ - name: Resolve release tag
27
+ id: release
28
+ run: |
29
+ if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
30
+ TAG="${{ github.event.inputs.tag }}"
31
+ else
32
+ TAG="${GITHUB_REF_NAME}"
33
+ fi
34
+ if [[ ! "${TAG}" =~ ^v[0-9] ]]; then
35
+ echo "Tag must look like v0.1.0, got: ${TAG}" >&2
36
+ exit 1
37
+ fi
38
+ VERSION="${TAG#v}"
39
+ echo "tag=${TAG}" >> "$GITHUB_OUTPUT"
40
+ echo "version=${VERSION}" >> "$GITHUB_OUTPUT"
41
+ echo "Publishing version: ${VERSION}"
42
+
43
+ - name: Checkout
44
+ uses: actions/checkout@v4
45
+ with:
46
+ fetch-depth: 0
47
+ ref: ${{ steps.release.outputs.tag }}
48
+
49
+ - name: Install uv
50
+ uses: astral-sh/setup-uv@v5
51
+ with:
52
+ enable-cache: true
53
+
54
+ - name: Set up Python
55
+ run: uv python install 3.11
56
+
57
+ - name: Build package
58
+ run: uv build
59
+
60
+ - name: Verify built version
61
+ run: |
62
+ BUILT_VERSION="$(uv run python - <<'PY'
63
+ import zipfile
64
+ from pathlib import Path
65
+
66
+ wheel = next(Path("dist").glob("*.whl"))
67
+ with zipfile.ZipFile(wheel) as archive:
68
+ for name in archive.namelist():
69
+ if name.endswith("METADATA"):
70
+ for line in archive.read(name).decode().splitlines():
71
+ if line.startswith("Version:"):
72
+ print(line.split(": ", 1)[1])
73
+ raise SystemExit(0)
74
+ raise SystemExit("Version not found in wheel metadata")
75
+ PY
76
+ )"
77
+ echo "Built wheel version: ${BUILT_VERSION}"
78
+ test "${BUILT_VERSION}" = "${{ steps.release.outputs.version }}"
79
+
80
+ - name: Publish to PyPI
81
+ uses: pypa/gh-action-pypi-publish@release/v1
82
+ with:
83
+ packages-dir: dist/
@@ -0,0 +1,72 @@
1
+ # Generated by hatch-vcs during build
2
+ smart_tts/_version.py
3
+
4
+ # Python
5
+ __pycache__/
6
+ *.py[cod]
7
+ *$py.class
8
+ *.so
9
+ *.egg
10
+ *.egg-info/
11
+ dist/
12
+ build/
13
+ *.manifest
14
+ *.spec
15
+
16
+ # Virtual environments
17
+ .venv/
18
+ venv/
19
+ env/
20
+ ENV/
21
+
22
+ # Packaging / lock tooling
23
+ pip-wheel-metadata/
24
+ .pip-cache/
25
+
26
+ # Tests & lint
27
+ .pytest_cache/
28
+ .ruff_cache/
29
+ .mypy_cache/
30
+ .coverage
31
+ .coverage.*
32
+ htmlcov/
33
+ .tox/
34
+ .nox/
35
+
36
+ # Secrets & local env
37
+ .env
38
+ .env.*
39
+ !.env.example
40
+
41
+ # Local diskcache / runtime data
42
+ .cache/
43
+ cache/
44
+ *.db
45
+ *.sqlite3
46
+
47
+ # Generated audio output
48
+ output/
49
+ output.*
50
+ *.mp3
51
+ *.wav
52
+ *.ogg
53
+ *.m4a
54
+
55
+ # IDE & editors
56
+ .vscode/*
57
+ !.vscode/launch.json
58
+ .idea/
59
+ *.swp
60
+ *.swo
61
+ *~
62
+
63
+ # OS
64
+ .DS_Store
65
+ Thumbs.db
66
+ desktop.ini
67
+
68
+ # Cursor
69
+ .cursor/
70
+
71
+ # Logs
72
+ *.log
@@ -0,0 +1,23 @@
1
+ {
2
+ // Use IntelliSense to learn about possible attributes.
3
+ // Hover to view descriptions of existing attributes.
4
+ // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
5
+ "version": "0.2.0",
6
+ "configurations": [
7
+
8
+ {
9
+ "name": "Python Debugger: Sync",
10
+ "type": "debugpy",
11
+ "request": "launch",
12
+ "program": "example.py",
13
+ "console": "integratedTerminal"
14
+ },
15
+ {
16
+ "name": "Python Debugger: Async",
17
+ "type": "debugpy",
18
+ "request": "launch",
19
+ "program": "example_async.py",
20
+ "console": "integratedTerminal"
21
+ }
22
+ ]
23
+ }
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 bad.robot
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,394 @@
1
+ Metadata-Version: 2.4
2
+ Name: smart-tts
3
+ Version: 1.9.0
4
+ Summary: Smart TTS: Fish Audio speech, ElevenLabs music/ambient, ffmpeg mixing
5
+ Project-URL: Homepage, https://github.com/vpuhoff/smart-tts
6
+ Project-URL: Repository, https://github.com/vpuhoff/smart-tts
7
+ Project-URL: Issues, https://github.com/vpuhoff/smart-tts/issues
8
+ Author: vpuhoff
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: elevenlabs,fish-audio,smart-tts,text-to-speech,tts
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.11
17
+ Classifier: Programming Language :: Python :: 3.12
18
+ Classifier: Programming Language :: Python :: 3.13
19
+ Classifier: Topic :: Multimedia :: Sound/Audio :: Speech
20
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
21
+ Requires-Python: >=3.11
22
+ Requires-Dist: diskcache>=5.6
23
+ Requires-Dist: elevenlabs>=2.0
24
+ Requires-Dist: httpx>=0.27
25
+ Requires-Dist: python-dotenv>=1.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
28
+ Requires-Dist: pytest>=8.0; extra == 'dev'
29
+ Requires-Dist: respx>=0.21; extra == 'dev'
30
+ Requires-Dist: ruff>=0.4; extra == 'dev'
31
+ Description-Content-Type: text/markdown
32
+
33
+ # smart-tts
34
+
35
+ High-level Python library for expressive speech production: [Fish Audio](https://fish.audio) TTS, [ElevenLabs](https://elevenlabs.io) music and ambient beds, and ffmpeg layer mixing.
36
+
37
+ Pass raw text (optionally with SSML `<break>` tags) — the library converts pauses to Fish Audio paralanguage, synthesizes speech in one continuous pass, and can mix music and ambient underneath.
38
+
39
+ ## Features
40
+
41
+ - **SmartTTS facade** — one pipeline from text to audio
42
+ - **Fish Audio speech** — single-pass synthesis via `s2.1-pro` (fallback to `s2.1-pro-free` on 402)
43
+ - **SSML breaks → Fish S2 tags** — `<break time="1.2s"/>` becomes `[long pause]`
44
+ - **ElevenLabs beds** — optional music (`music.compose`) and ambient (`text_to_sound_effects`)
45
+ - **ffmpeg mixing** — speech + music + ambient with volume weights
46
+ - **Sync & async API** — `SmartTTS` / `AsyncSmartTTS` with the same signatures
47
+ - **Voice registry** — local `diskcache` for registered Fish `reference_id` voices
48
+
49
+ ## Requirements
50
+
51
+ - Python 3.11+
52
+ - `ffmpeg` and `ffprobe` in `PATH` (only when mixing layers)
53
+
54
+ ## Installation
55
+
56
+ ```bash
57
+ pip install smart-tts
58
+ ```
59
+
60
+ Or from source:
61
+
62
+ ```bash
63
+ git clone https://github.com/vpuhoff/smart-tts.git
64
+ cd smart-tts
65
+ uv sync --dev
66
+ ```
67
+
68
+ ## Quick start
69
+
70
+ 1. Copy `.env.example` to `.env` and set your API keys:
71
+
72
+ ```bash
73
+ cp .env.example .env
74
+ ```
75
+
76
+ 2. Run synthesis:
77
+
78
+ ```python
79
+ from pathlib import Path
80
+
81
+ from smart_tts import SmartTTS, SynthesisTask, VoiceSettings
82
+
83
+ tts = SmartTTS.from_env()
84
+ tts.sync_voices()
85
+
86
+ result = tts.synthesize_to_file(
87
+ SynthesisTask(
88
+ text='Центр, <break time="1.2s" /> на связи резидентура.',
89
+ language="ru",
90
+ style="serious",
91
+ emotion="serious",
92
+ voice_id="67d37d81cb7340b391e9461d6671de03",
93
+ voice_settings=VoiceSettings(temperature=0.7, speed=1.0),
94
+ ),
95
+ Path("output.mp3"),
96
+ )
97
+
98
+ print(result.enhanced_text)
99
+ ```
100
+
101
+ See [`example.py`](example.py) for a full demo: detective radio report with speech variants, custom music, and remix.
102
+
103
+ ```bash
104
+ uv run python example.py
105
+ uv run python example.py --variants 2
106
+ uv run python example.py --remix-only --music back.mp3
107
+ ```
108
+
109
+ ## Synthesis with music and ambient
110
+
111
+ Pass bed prompts or file paths in `SynthesisTask` — `synthesize()` generates speech, then mixes layers automatically:
112
+
113
+ ```python
114
+ result = tts.synthesize(
115
+ SynthesisTask(
116
+ text="...",
117
+ voice_id="your-fish-reference-id",
118
+ music_prompt="Melancholic noir piano, instrumental, no vocals",
119
+ ambient_prompt="Subtle radio hum, tape hiss, seamless loop",
120
+ music_volume=0.32,
121
+ ambient_volume=0.18,
122
+ bed_weight=0.68,
123
+ )
124
+ )
125
+ ```
126
+
127
+ Or provide pre-recorded files:
128
+
129
+ ```python
130
+ SynthesisTask(
131
+ text="...",
132
+ music_path="back.mp3",
133
+ ambient_path="ambient.wav",
134
+ )
135
+ ```
136
+
137
+ `ELEVENLABS_API_KEY` is required for API-generated beds. Custom files work without it.
138
+
139
+ ## Task parameters
140
+
141
+ ### Core fields
142
+
143
+ | Field | Description |
144
+ |-------|-------------|
145
+ | `text` | Source script; SSML `<break time="Xs"/>` converted to Fish pauses when `enhance_text=True` |
146
+ | `voice_id` | Fish Audio `reference_id` |
147
+ | `language` | Language hint (metadata / emotion mapping) |
148
+ | `style`, `emotion`, `use_case` | Context hints; `emotion` adds Fish paralanguage prefix |
149
+ | `enhance_text` | `True` — break conversion + emotion prefix; `False` — raw text |
150
+ | `voice_settings` | `temperature`, `speed`, `top_p`, `repetition_penalty` for Fish API |
151
+ | `model` | Fish model (see table below) |
152
+
153
+ ### Mixing fields
154
+
155
+ | Field | Default | Description |
156
+ |-------|---------|-------------|
157
+ | `music_prompt` | — | ElevenLabs Music API prompt |
158
+ | `ambient_prompt` | — | ElevenLabs Sound Effects API prompt |
159
+ | `music_path` | — | Pre-recorded music file |
160
+ | `ambient_path` | — | Pre-recorded ambient file |
161
+ | `music_volume` | `0.32` | Music level in mix |
162
+ | `ambient_volume` | `0.18` | Ambient level in mix |
163
+ | `speech_volume` | `1.0` | Speech gain in mix (`1.0` = unchanged) |
164
+ | `bed_weight` | `0.68` | Background bed weight vs speech |
165
+
166
+ ### SSML breaks
167
+
168
+ ```python
169
+ # Input
170
+ 'Срочное донесение. <break time="1.2s" /> Обнаружена цель.'
171
+
172
+ # After enhance_text (Fish S2 [bracket] tags)
173
+ 'Срочное донесение. [long pause] Обнаружена цель.'
174
+ ```
175
+
176
+ | Pause | Fish markup |
177
+ |-------|-------------|
178
+ | ≥ 1.2 s | `[long pause]` |
179
+ | ≥ 0.75 s | `[pause]` |
180
+ | ≥ 0.4 s | `...` |
181
+
182
+ ### Emotion tags
183
+
184
+ Fish Audio S2/S2.1 interprets `[bracket]` tags as delivery hints (not spoken text). Parenthesis prose like `(soft tone)` is **not** supported and may be read aloud.
185
+
186
+ | `emotion` | Tag added |
187
+ |-----------|-----------|
188
+ | `warm` | `[warm]` |
189
+ | `serious` | `[serious]` |
190
+ | `excited` | `[excited]` |
191
+ | `sad` | `[sad]` |
192
+ | `whisper` | `[whisper]` |
193
+ | `calm` | `[calm]` |
194
+
195
+ ## Configuration
196
+
197
+ ### Required
198
+
199
+ | Variable | Description |
200
+ |----------|-------------|
201
+ | `FISH_API_KEY` | Fish Audio API key |
202
+
203
+ ### Optional
204
+
205
+ | Variable | Default | Description |
206
+ |----------|---------|-------------|
207
+ | `ELEVENLABS_API_KEY` | — | For music/ambient generation |
208
+ | `FISH_DEFAULT_MODEL` | `s2.1-pro` | Fish model (`s2.1-pro-free` if no paid credits) |
209
+ | `FISH_DEFAULT_VOICE_ID` | Kanevsky ref id | Fallback `reference_id` |
210
+ | `FISH_API_URL` | `https://api.fish.audio/v1/tts` | Fish TTS endpoint |
211
+ | `ELEVENLABS_CACHE_DIR` | `~/.cache/smart-tts` | Voice registry cache |
212
+ | `ELEVENLABS_DEFAULT_OUTPUT_FORMAT` | `mp3_44100_128` | Output format |
213
+
214
+ Programmatic configuration:
215
+
216
+ ```python
217
+ from smart_tts import SmartTTS, SmartTTSConfig, TTSModel
218
+
219
+ config = SmartTTSConfig(
220
+ fish_api_key="...",
221
+ elevenlabs_api_key="...", # optional
222
+ default_model=TTSModel.ELEVEN_V3,
223
+ default_voice_id="67d37d81cb7340b391e9461d6671de03",
224
+ )
225
+ tts = SmartTTS(config)
226
+ ```
227
+
228
+ ## Usage
229
+
230
+ ### Synthesis pipeline
231
+
232
+ ```python
233
+ from smart_tts import SmartTTS, SynthesisTask, TTSModel, VoiceSettings
234
+
235
+ with SmartTTS.from_env() as tts:
236
+ tts.sync_voices()
237
+ result = tts.synthesize(
238
+ SynthesisTask(
239
+ text="Срочное донесение.",
240
+ voice_id="67d37d81cb7340b391e9461d6671de03",
241
+ model=TTSModel.ELEVEN_V3,
242
+ emotion="serious",
243
+ voice_settings=VoiceSettings(temperature=0.7),
244
+ )
245
+ )
246
+ audio_bytes = result.audio
247
+ prepared_text = result.enhanced_text
248
+ ```
249
+
250
+ ### Generation templates
251
+
252
+ Use `GenerationTemplate` to bundle speech, background, and mix settings. Pass only the script text at synthesis time:
253
+
254
+ ```python
255
+ from pathlib import Path
256
+
257
+ from smart_tts import INVESTIGATION, GenerationTemplate, SmartTTS, synthesize_with_template
258
+
259
+ # Built-in preset
260
+ with SmartTTS.from_env() as tts:
261
+ speech = tts.synthesize_text(
262
+ 'Срочное донесение. <break time="1.2s" /> Обнаружена цель.',
263
+ INVESTIGATION,
264
+ mix=False, # speech only
265
+ )
266
+ tts.synthesize_text_to_file(
267
+ "Конец связи.",
268
+ INVESTIGATION,
269
+ Path("output/speech.mp3"),
270
+ mix=False,
271
+ )
272
+ tts.remix_file(
273
+ Path("output/speech.mp3"),
274
+ Path("output/final.mp3"),
275
+ INVESTIGATION,
276
+ )
277
+
278
+ # Custom template or overrides
279
+ template = INVESTIGATION.with_overrides(
280
+ speech_volume=1.2,
281
+ music_path="back.mp3",
282
+ ambient_path=None,
283
+ )
284
+ result = synthesize_with_template("Привет!", template, mix=True)
285
+
286
+ # Load/save JSON recipes (see templates/investigation.json)
287
+ template = GenerationTemplate.from_json_file("templates/investigation.json")
288
+ ```
289
+
290
+ | Method | Description |
291
+ |--------|-------------|
292
+ | `template.to_task(text, mix=True, **overrides)` | Build `SynthesisTask` |
293
+ | `template.with_overrides(**kwargs)` | Copy with changed fields |
294
+ | `GenerationTemplate.from_dict()` / `from_json_file()` | Deserialize |
295
+ | `template.save_json(path)` | Serialize to JSON |
296
+ | `get_template("investigation")` | Built-in preset lookup |
297
+ | `tts.synthesize_text(text, template)` | Synthesize with template |
298
+ | `tts.remix_file(speech, output, template)` | Mix speech + beds |
299
+
300
+ ### Preview prepared text without TTS
301
+
302
+ ```python
303
+ prepared = tts.enhance_text_only(
304
+ SynthesisTask(
305
+ text='Центр, <break time="1.2s" /> на связи.',
306
+ emotion="warm",
307
+ )
308
+ )
309
+ ```
310
+
311
+ ### One-liner
312
+
313
+ ```python
314
+ from smart_tts import synthesize
315
+
316
+ result = synthesize(
317
+ "Привет, мир!",
318
+ language="ru",
319
+ style="neutral",
320
+ )
321
+ ```
322
+
323
+ ### Async API
324
+
325
+ ```python
326
+ import asyncio
327
+ from pathlib import Path
328
+
329
+ from smart_tts import AsyncSmartTTS, SynthesisTask, asynthesize
330
+
331
+ async def main() -> None:
332
+ async with AsyncSmartTTS.from_env() as tts:
333
+ result = await tts.synthesize_to_file(
334
+ SynthesisTask(text="Привет!", language="ru"),
335
+ Path("output.mp3"),
336
+ )
337
+ print(result.enhanced_text)
338
+
339
+ asyncio.run(main())
340
+ ```
341
+
342
+ ### Voice registry
343
+
344
+ ```python
345
+ voices = tts.list_voices()
346
+ voice = tts.get_voice("reference-id")
347
+ tts.sync_voices(force=True) # refresh default voice in cache
348
+ ```
349
+
350
+ Fish voices are referenced by `reference_id` from the Fish Audio console. Register custom voices via `VoiceRegistry.register_voice()` or set `FISH_DEFAULT_VOICE_ID`.
351
+
352
+ ## Models (`TTSModel`)
353
+
354
+ Legacy enum names map to Fish Audio models:
355
+
356
+ | Enum | Fish model | Notes |
357
+ |------|------------|-------|
358
+ | `TTSModel.ELEVEN_V3` | `s2.1-pro` | Default; auto-fallback to `s2.1-pro-free` on 402 |
359
+ | `TTSModel.ELEVEN_MULTILINGUAL_V2` | `s2-pro` | |
360
+ | `TTSModel.ELEVEN_FLASH_V2_5` | `s1` | |
361
+
362
+ ## Package layout
363
+
364
+ ```
365
+ smart_tts/
366
+ ├── tts.py, async_tts.py # SmartTTS facade
367
+ ├── templates.py # GenerationTemplate presets
368
+ ├── config.py, models.py
369
+ ├── client/
370
+ │ ├── fish.py # Fish Audio TTS
371
+ │ └── elevenlabs.py # Music + ambient beds
372
+ ├── script/breaks.py # SSML → Fish paralanguage
373
+ ├── audio/mixer.py # ffmpeg mix_tracks
374
+ ├── text.py # prepare_text()
375
+ └── voices/registry.py # diskcache voice registry
376
+ ```
377
+
378
+ ## Development
379
+
380
+ ```bash
381
+ uv sync --dev
382
+ uv run pytest
383
+ uv run ruff check .
384
+ ```
385
+
386
+ ## License
387
+
388
+ MIT — see [LICENSE](LICENSE).
389
+
390
+ ## Links
391
+
392
+ - [GitHub repository](https://github.com/vpuhoff/smart-tts)
393
+ - [PyPI package](https://pypi.org/project/smart-tts/)
394
+ - [Design specification (Russian)](spec.md)