@biggora/claude-plugins 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/.claude/settings.local.json +13 -0
  2. package/CLAUDE.md +55 -0
  3. package/LICENSE +1 -1
  4. package/README.md +208 -39
  5. package/bin/cli.js +39 -0
  6. package/package.json +30 -17
  7. package/registry/registry.json +166 -1
  8. package/registry/schema.json +10 -0
  9. package/src/commands/skills/add.js +194 -0
  10. package/src/commands/skills/list.js +52 -0
  11. package/src/commands/skills/remove.js +27 -0
  12. package/src/commands/skills/update.js +74 -0
  13. package/src/config.js +5 -0
  14. package/src/skills/codex-cli/SKILL.md +265 -0
  15. package/src/skills/commafeed-api/SKILL.md +1012 -0
  16. package/src/skills/gemini-cli/SKILL.md +379 -0
  17. package/src/skills/gemini-cli/references/commands.md +145 -0
  18. package/src/skills/gemini-cli/references/configuration.md +182 -0
  19. package/src/skills/gemini-cli/references/headless-and-scripting.md +181 -0
  20. package/src/skills/gemini-cli/references/mcp-and-extensions.md +254 -0
  21. package/src/skills/n8n-api/SKILL.md +623 -0
  22. package/src/skills/notebook-lm/SKILL.md +217 -0
  23. package/src/skills/notebook-lm/references/artifact-options.md +168 -0
  24. package/src/skills/notebook-lm/references/auth.md +58 -0
  25. package/src/skills/notebook-lm/references/workflows.md +144 -0
  26. package/src/skills/screen-recording/SKILL.md +309 -0
  27. package/src/skills/screen-recording/references/approach1-programmatic.md +311 -0
  28. package/src/skills/screen-recording/references/approach2-xvfb.md +232 -0
  29. package/src/skills/screen-recording/references/design-patterns.md +168 -0
  30. package/src/skills/test-mobile-app/SKILL.md +212 -0
  31. package/src/skills/test-mobile-app/references/report-template.md +95 -0
  32. package/src/skills/test-mobile-app/references/setup-appium.md +154 -0
  33. package/src/skills/test-mobile-app/scripts/analyze_apk.py +164 -0
  34. package/src/skills/test-mobile-app/scripts/check_environment.py +116 -0
  35. package/src/skills/test-mobile-app/scripts/generate_report.py +250 -0
  36. package/src/skills/test-mobile-app/scripts/run_tests.py +326 -0
  37. package/src/skills/test-web-ui/SKILL.md +232 -0
  38. package/src/skills/test-web-ui/references/test_case_schema.md +102 -0
  39. package/src/skills/test-web-ui/scripts/discover.py +176 -0
  40. package/src/skills/test-web-ui/scripts/generate_report.py +237 -0
  41. package/src/skills/test-web-ui/scripts/run_tests.py +296 -0
  42. package/src/skills/text-to-speech/SKILL.md +236 -0
  43. package/src/skills/text-to-speech/references/espeak-cli.md +277 -0
  44. package/src/skills/text-to-speech/references/kokoro-onnx.md +124 -0
  45. package/src/skills/text-to-speech/references/online-engines.md +128 -0
  46. package/src/skills/text-to-speech/references/pyttsx3-espeak.md +143 -0
  47. package/src/skills/tm-search/SKILL.md +240 -0
  48. package/src/skills/tm-search/references/field-guide.md +79 -0
  49. package/src/skills/tm-search/references/scraping-fallback.md +140 -0
  50. package/src/skills/tm-search/scripts/tm_search.py +375 -0
  51. package/src/skills/wp-rest-api/SKILL.md +114 -0
  52. package/src/skills/wp-rest-api/references/authentication.md +18 -0
  53. package/src/skills/wp-rest-api/references/custom-content-types.md +20 -0
  54. package/src/skills/wp-rest-api/references/discovery-and-params.md +20 -0
  55. package/src/skills/wp-rest-api/references/responses-and-fields.md +30 -0
  56. package/src/skills/wp-rest-api/references/routes-and-endpoints.md +36 -0
  57. package/src/skills/wp-rest-api/references/schema.md +22 -0
  58. package/src/skills/youtube-search/SKILL.md +412 -0
  59. package/src/skills/youtube-search/references/parsing-examples.md +159 -0
  60. package/src/skills/youtube-search/references/youtube-api-quota.md +85 -0
  61. package/src/skills/youtube-thumbnail/SKILL.md +1060 -0
  62. package/tests/commands/info.test.js +49 -0
  63. package/tests/commands/install.test.js +36 -0
  64. package/tests/commands/list.test.js +66 -0
  65. package/tests/commands/publish.test.js +182 -0
  66. package/tests/commands/search.test.js +45 -0
  67. package/tests/commands/uninstall.test.js +29 -0
  68. package/tests/commands/update.test.js +59 -0
  69. package/tests/functional/skills-lifecycle.test.js +293 -0
  70. package/tests/helpers/fixtures.js +63 -0
  71. package/tests/integration/cli.test.js +83 -0
  72. package/tests/skills/add.test.js +138 -0
  73. package/tests/skills/list.test.js +63 -0
  74. package/tests/skills/remove.test.js +38 -0
  75. package/tests/skills/update.test.js +60 -0
  76. package/tests/unit/config.test.js +31 -0
  77. package/tests/unit/registry.test.js +79 -0
  78. package/tests/unit/utils.test.js +150 -0
  79. package/tests/validation/registry-schema.test.js +112 -0
  80. package/tests/validation/skills-validation.test.js +96 -0
@@ -0,0 +1,277 @@
1
+ # espeak-ng CLI Reference
2
+
3
+ Direct command-line usage of espeak-ng. Useful when you need fine-grained control over
4
+ phonemes, prosody, SSML, or want to avoid the Python pyttsx3 overhead.
5
+
6
+ ---
7
+
8
+ ## Basic Usage
9
+
10
+ ```bash
11
+ # Speak to stdout (pipe to ffmpeg)
12
+ espeak-ng "Hello world"
13
+
14
+ # Save to WAV file
15
+ espeak-ng -w /tmp/output.wav "Hello world"
16
+
17
+ # Read from file
18
+ espeak-ng -w /tmp/output.wav -f /tmp/script.txt
19
+
20
+ # Read from stdin
21
+ echo "Hello world" | espeak-ng -w /tmp/output.wav
22
+ ```
23
+
24
+ ---
25
+
26
+ ## Key Flags
27
+
28
+ | Flag | Description | Example |
29
+ |------|-------------|---------|
30
+ | `-v <voice>` | Voice/language | `-v en-gb`, `-v ru`, `-v de` |
31
+ | `-s <rate>` | Speed in words/min (default 175) | `-s 140` |
32
+ | `-p <pitch>` | Pitch 0–99 (default 50) | `-p 55` |
33
+ | `-a <amplitude>` | Volume 0–200 (default 100) | `-a 120` |
34
+ | `-g <gap>` | Gap between words in 10ms units | `-g 5` |
35
+ | `-w <file>` | Write WAV output to file | `-w out.wav` |
36
+ | `-f <file>` | Read input from text file | `-f script.txt` |
37
+ | `--ipa` | Print IPA phonemes to stdout | `--ipa` |
38
+ | `-q` | Quiet — no audio, just phoneme output | `-q --ipa` |
39
+ | `--pho` | Output phoneme mnemonics | `--pho` |
40
+ | `-m` | Interpret input as SSML markup | `-m` |
41
+ | `-b 1` | Input is UTF-8 (default on Linux) | `-b 1` |
42
+ | `--punct` | Speak punctuation characters | `--punct` |
43
+ | `-z` | No final sentence pause | `-z` |
44
+
45
+ ---
46
+
47
+ ## Voice Selection
48
+
49
+ ```bash
50
+ # List all voices
51
+ espeak-ng --voices
52
+
53
+ # List voices for a language
54
+ espeak-ng --voices=en
55
+ espeak-ng --voices=ru
56
+ espeak-ng --voices=zh
57
+
58
+ # Key voice IDs
59
+ # English: en, en-gb, en-us, en-gb-scotland, en-gb-x-gbclan, en-gb-x-rp
60
+ # Russian: ru
61
+ # German: de
62
+ # French: fr
63
+ # Spanish: es, es-419 (Latin America)
64
+ # Chinese: cmn (Mandarin), yue (Cantonese)
65
+ # Japanese: ja
66
+ # Arabic: ar
67
+ # Hindi: hi
68
+ # Korean: ko
69
+ # Italian: it
70
+ # Dutch: nl
71
+ # Polish: pl
72
+ # Ukrainian: uk
73
+ # Turkish: tr
74
+ # Swedish: sv
75
+ # Portuguese: pt, pt-br
76
+ ```
77
+
78
+ ---
79
+
80
+ ## SSML Support
81
+
82
+ espeak-ng understands a subset of SSML. Pass `-m` flag to enable:
83
+
84
+ ```bash
85
+ espeak-ng -m -w /tmp/ssml_out.wav '<speak>
86
+ Hello, <break time="500ms"/> how are you?
87
+ <prosody rate="slow" pitch="+5st">This part is slower and higher.</prosody>
88
+ <emphasis level="strong">Important point here.</emphasis>
89
+ Back to normal speed now.
90
+ </speak>'
91
+ ```
92
+
93
+ ### Supported SSML Tags
94
+
95
+ ```xml
96
+ <!-- Pause -->
97
+ <break time="300ms"/>
98
+ <break time="1s"/>
99
+
100
+ <!-- Prosody control -->
101
+ <prosody rate="slow">...</prosody> <!-- slow, medium, fast, x-slow, x-fast -->
102
+ <prosody rate="0.8">...</prosody> <!-- relative rate: 0.5–2.0 -->
103
+ <prosody pitch="+5st">...</prosody> <!-- semitones: -12st to +12st -->
104
+ <prosody pitch="high">...</prosody> <!-- x-low, low, medium, high, x-high -->
105
+ <prosody volume="loud">...</prosody> <!-- silent, x-soft, soft, medium, loud, x-loud -->
106
+
107
+ <!-- Emphasis -->
108
+ <emphasis level="strong">...</emphasis> <!-- none, reduced, moderate, strong -->
109
+
110
+ <!-- Say-as (number/date formatting) -->
111
+ <say-as interpret-as="cardinal">42</say-as>
112
+ <say-as interpret-as="ordinal">3</say-as>
113
+ <say-as interpret-as="characters">CPU</say-as>
114
+ <say-as interpret-as="date" format="ymd">2024-03-15</say-as>
115
+ <say-as interpret-as="time" format="hms24">14:30:00</say-as>
116
+
117
+ <!-- Phoneme (IPA or x-sampa) -->
118
+ <phoneme alphabet="ipa" ph="həˈloʊ">Hello</phoneme>
119
+ <phoneme alphabet="x-sampa" ph="h@'loU">Hello</phoneme>
120
+
121
+ <!-- Sub (spoken alias) -->
122
+ <sub alias="Artificial Intelligence">AI</sub>
123
+
124
+ <!-- Language switch -->
125
+ <voice xml:lang="fr">Bonjour</voice>
126
+ ```
127
+
128
+ ---
129
+
130
+ ## Phoneme Control
131
+
132
+ ```bash
133
+ # Get IPA phonemes for text
134
+ espeak-ng -q --ipa "Hello world"
135
+ # → həlˈəʊ wˈɜːld
136
+
137
+ # Get x-sampa phonemes
138
+ espeak-ng -q --pho "Hello world"
139
+
140
+ # Speak using IPA directly
141
+ espeak-ng -w /tmp/ipa.wav "<phoneme alphabet='ipa' ph='həˈloʊ'>Hello</phoneme>" -m
142
+
143
+ # Print phoneme list for a language
144
+ espeak-ng --voices=en --pho
145
+ ```
146
+
147
+ ---
148
+
149
+ ## Python Subprocess Integration
150
+
151
+ ```python
152
+ import subprocess
153
+
154
+ def espeak_tts(text: str, output_wav: str,
155
+ voice: str = "en-gb-scotland",
156
+ speed: int = 145,
157
+ pitch: int = 52,
158
+ amplitude: int = 110,
159
+ ssml: bool = False) -> str:
160
+ """
161
+ Generate TTS via espeak-ng CLI.
162
+ Returns path to output WAV file.
163
+ """
164
+ cmd = [
165
+ "espeak-ng",
166
+ "-v", voice,
167
+ "-s", str(speed),
168
+ "-p", str(pitch),
169
+ "-a", str(amplitude),
170
+ "-w", output_wav,
171
+ ]
172
+ if ssml:
173
+ cmd.append("-m")
174
+ cmd.append(text)
175
+
176
+ subprocess.run(cmd, check=True, capture_output=True)
177
+ return output_wav
178
+
179
+
180
+ def espeak_tts_file(input_txt: str, output_wav: str,
181
+ voice: str = "en-gb-scotland",
182
+ speed: int = 145) -> str:
183
+ """Generate TTS from a text file."""
184
+ subprocess.run([
185
+ "espeak-ng", "-v", voice, "-s", str(speed),
186
+ "-f", input_txt, "-w", output_wav
187
+ ], check=True, capture_output=True)
188
+ return output_wav
189
+
190
+
191
+ def get_ipa(text: str, lang: str = "en") -> str:
192
+ """Return IPA transcription of text."""
193
+ result = subprocess.run(
194
+ ["espeak-ng", "-v", lang, "-q", "--ipa", text],
195
+ capture_output=True, text=True
196
+ )
197
+ return result.stdout.strip()
198
+ ```
199
+
200
+ ---
201
+
202
+ ## WAV → MP3 Pipeline
203
+
204
+ ```bash
205
+ # Basic
206
+ ffmpeg -i /tmp/espeak.wav -c:a libmp3lame -b:a 192k /tmp/output.mp3 -y -loglevel quiet
207
+
208
+ # Enhanced speech quality (cleaner high-mids, reduced low rumble)
209
+ ffmpeg -i /tmp/espeak.wav \
210
+ -af "aresample=44100,equalizer=f=3000:t=o:w=1:g=3,equalizer=f=200:t=o:w=1:g=-2,loudnorm=I=-16:TP=-1.5:LRA=11" \
211
+ -c:a libmp3lame -b:a 192k /tmp/output.mp3 -y -loglevel quiet
212
+ ```
213
+
214
+ ---
215
+
216
+ ## Multi-voice Script (narrator + character)
217
+
218
+ ```python
219
+ import subprocess, os
220
+
221
+ def multi_voice_tts(lines: list[dict], output_mp3: str) -> str:
222
+ """
223
+ lines = [
224
+ {"text": "Welcome.", "voice": "en-gb-scotland", "speed": 140},
225
+ {"text": "Thank you.", "voice": "en-us", "speed": 160},
226
+ ]
227
+ """
228
+ wavs = []
229
+ for i, line in enumerate(lines):
230
+ wav = f"/tmp/mv_{i}.wav"
231
+ subprocess.run([
232
+ "espeak-ng",
233
+ "-v", line.get("voice", "en-gb-scotland"),
234
+ "-s", str(line.get("speed", 145)),
235
+ "-p", str(line.get("pitch", 50)),
236
+ "-w", wav,
237
+ line["text"]
238
+ ], check=True, capture_output=True)
239
+ wavs.append(wav)
240
+
241
+ concat = "/tmp/mv_concat.txt"
242
+ with open(concat, "w") as f:
243
+ for wav in wavs:
244
+ f.write(f"file '{wav}'\n")
245
+
246
+ subprocess.run([
247
+ "ffmpeg", "-f", "concat", "-safe", "0", "-i", concat,
248
+ "-c:a", "libmp3lame", "-b:a", "192k",
249
+ output_mp3, "-y", "-loglevel", "quiet"
250
+ ], check=True)
251
+
252
+ for wav in wavs:
253
+ os.unlink(wav)
254
+
255
+ return output_mp3
256
+ ```
257
+
258
+ ---
259
+
260
+ ## Useful Tuning Combinations
261
+
262
+ ```bash
263
+ # Natural male English (UK)
264
+ espeak-ng -v en-gb-scotland -s 145 -p 42 -a 110 -w out.wav "Text here"
265
+
266
+ # Natural female English (RP)
267
+ espeak-ng -v en-gb-x-rp -s 150 -p 62 -a 110 -w out.wav "Text here"
268
+
269
+ # Russian — clear and measured
270
+ espeak-ng -v ru -s 130 -p 50 -a 120 -w out.wav "Текст здесь"
271
+
272
+ # Fast technical narration (US English)
273
+ espeak-ng -v en-us -s 175 -p 48 -a 105 -w out.wav "Text here"
274
+
275
+ # Slow, deliberate presentation voice
276
+ espeak-ng -v en-gb-scotland -s 120 -p 50 -g 8 -a 115 -w out.wav "Text here"
277
+ ```
@@ -0,0 +1,124 @@
1
+ # Kokoro ONNX — High-Quality Neural TTS
2
+
3
+ Kokoro is a state-of-the-art offline neural TTS engine. Produces near-human quality audio.
4
+ Available in this environment (`kokoro-onnx` installed) but requires model files.
5
+
6
+ ## Languages Supported
7
+
8
+ English (US/UK), Chinese, Japanese, Korean, French, Spanish, Hindi, Portuguese, Italian, Brazilian Portuguese
9
+
10
+ ## Installation
11
+
12
+ ```bash
13
+ pip install kokoro-onnx soundfile --break-system-packages
14
+ ```
15
+
16
+ ## Model Download (requires internet once)
17
+
18
+ ```python
19
+ from huggingface_hub import hf_hub_download
20
+
21
+ # Download model files
22
+ model_path = hf_hub_download(repo_id="hexgrad/Kokoro-82M-ONNX", filename="kokoro-v1.0.onnx")
23
+ voices_path = hf_hub_download(repo_id="hexgrad/Kokoro-82M-ONNX", filename="voices-v1.0.bin")
24
+
25
+ print(f"Model: {model_path}")
26
+ print(f"Voices: {voices_path}")
27
+ ```
28
+
29
+ Or manually download and place in `/tmp/kokoro/`:
30
+ - `kokoro-v1.0.onnx`
31
+ - `voices-v1.0.bin`
32
+
33
+ ## Usage
34
+
35
+ ```python
36
+ from kokoro_onnx import Kokoro
37
+ import soundfile as sf
38
+ import subprocess
39
+
40
+ def kokoro_tts(text: str, output_mp3: str, voice: str = 'af_heart', speed: float = 1.0,
41
+ model_path: str = '/tmp/kokoro/kokoro-v1.0.onnx',
42
+ voices_path: str = '/tmp/kokoro/voices-v1.0.bin'):
43
+ """
44
+ Generate high-quality neural TTS with Kokoro.
45
+
46
+ Voices:
47
+ English (US): af_heart, af_bella, af_nicole, am_adam, am_michael
48
+ English (UK): bf_emma, bf_isabella, bm_george, bm_lewis
49
+ Japanese: jf_nezuko, jf_tsumugi, jm_kumo
50
+ Chinese: zf_xiaobei, zf_xiaoni, zm_yunjian
51
+ French: ff_siwis
52
+ Korean: kf_alpha
53
+ Spanish: es-419-af-dalia, es-419-am-diego
54
+ Hindi: hf_alpha, hm_omega
55
+ Italian: if_sara, im_nicola
56
+ Brazilian PT: pf_dora, pm_alex
57
+ Portuguese: ptf_edite
58
+ """
59
+ kokoro = Kokoro(model_path, voices_path)
60
+
61
+ samples, sample_rate = kokoro.create(text, voice=voice, speed=speed, lang="en-us")
62
+
63
+ wav_path = output_mp3.replace('.mp3', '.wav')
64
+ sf.write(wav_path, samples, sample_rate)
65
+
66
+ subprocess.run([
67
+ 'ffmpeg', '-i', wav_path,
68
+ '-c:a', 'libmp3lame', '-b:a', '192k',
69
+ output_mp3, '-y', '-loglevel', 'quiet'
70
+ ], check=True)
71
+
72
+ return output_mp3
73
+
74
+ # Example
75
+ kokoro_tts(
76
+ "Welcome to our product demo. This neural TTS produces natural speech.",
77
+ "/tmp/kokoro_output.mp3",
78
+ voice="af_heart"
79
+ )
80
+ ```
81
+
82
+ ## Language Code Mapping
83
+
84
+ ```python
85
+ KOKORO_LANG = {
86
+ 'en-us': 'en-us',
87
+ 'en-gb': 'en-gb',
88
+ 'ja': 'ja',
89
+ 'zh': 'zh',
90
+ 'fr': 'fr-fr',
91
+ 'ko': 'ko',
92
+ 'es': 'es',
93
+ 'hi': 'hi',
94
+ 'it': 'it',
95
+ 'pt': 'pt-br',
96
+ 'pt-pt': 'pt-pt',
97
+ }
98
+ ```
99
+
100
+ ## Caching Model Between Sessions
101
+
102
+ ```python
103
+ import os, shutil
104
+ from pathlib import Path
105
+
106
+ CACHE_DIR = Path('/home/claude/.kokoro_cache')
107
+ CACHE_DIR.mkdir(exist_ok=True)
108
+
109
+ MODEL_CACHED = CACHE_DIR / 'kokoro-v1.0.onnx'
110
+ VOICES_CACHED = CACHE_DIR / 'voices-v1.0.bin'
111
+
112
+ def get_or_download_kokoro():
113
+ if MODEL_CACHED.exists() and VOICES_CACHED.exists():
114
+ return str(MODEL_CACHED), str(VOICES_CACHED)
115
+
116
+ from huggingface_hub import hf_hub_download
117
+ model = hf_hub_download("hexgrad/Kokoro-82M-ONNX", "kokoro-v1.0.onnx")
118
+ voices = hf_hub_download("hexgrad/Kokoro-82M-ONNX", "voices-v1.0.bin")
119
+
120
+ shutil.copy(model, MODEL_CACHED)
121
+ shutil.copy(voices, VOICES_CACHED)
122
+
123
+ return str(MODEL_CACHED), str(VOICES_CACHED)
124
+ ```
@@ -0,0 +1,128 @@
1
+ # Online TTS Engines Reference
2
+
3
+ Use when internet access is available. Superior quality to offline engines.
4
+
5
+ ## Priority Order
6
+
7
+ 1. **OpenAI TTS** — best quality, via Anthropic API artifacts
8
+ 2. **edge-tts** (Microsoft Azure) — free, neural quality, 100+ voices
9
+ 3. **gTTS** (Google) — free, good quality, 40+ languages
10
+
11
+ ---
12
+
13
+ ## edge-tts (Microsoft Neural TTS — FREE)
14
+
15
+ Best free online option. 400+ voices, 100+ languages, neural quality.
16
+
17
+ ```bash
18
+ pip install edge-tts --break-system-packages
19
+ ```
20
+
21
+ ```python
22
+ import asyncio
23
+ import edge_tts
24
+
25
+ async def edge_tts_generate(text: str, output_mp3: str, voice: str = "en-US-AriaNeural"):
26
+ communicate = edge_tts.Communicate(text, voice)
27
+ await communicate.save(output_mp3)
28
+ return output_mp3
29
+
30
+ # Sync wrapper
31
+ def tts(text: str, output_mp3: str, voice: str = "en-US-AriaNeural"):
32
+ asyncio.run(edge_tts_generate(text, output_mp3, voice))
33
+
34
+ # List voices
35
+ async def list_voices():
36
+ voices = await edge_tts.list_voices()
37
+ for v in voices:
38
+ print(v['ShortName'], v['Locale'], v['Gender'])
39
+ ```
40
+
41
+ ### Recommended Voices
42
+
43
+ ```python
44
+ EDGE_VOICES = {
45
+ 'en-us-f': 'en-US-AriaNeural', # US English, female, natural
46
+ 'en-us-m': 'en-US-GuyNeural', # US English, male
47
+ 'en-gb-f': 'en-GB-SoniaNeural', # UK English, female
48
+ 'ru-f': 'ru-RU-SvetlanaNeural', # Russian, female
49
+ 'ru-m': 'ru-RU-DmitryNeural', # Russian, male
50
+ 'de-f': 'de-DE-KatjaNeural', # German
51
+ 'fr-f': 'fr-FR-DeniseNeural', # French
52
+ 'es-f': 'es-ES-ElviraNeural', # Spanish
53
+ 'zh-f': 'zh-CN-XiaoxiaoNeural', # Chinese
54
+ 'ja-f': 'ja-JP-NanamiNeural', # Japanese
55
+ 'ar-f': 'ar-EG-SalmaNeural', # Arabic
56
+ 'hi-f': 'hi-IN-SwaraNeural', # Hindi
57
+ 'ko-f': 'ko-KR-SunHiNeural', # Korean
58
+ 'pt-f': 'pt-BR-FranciscaNeural', # Portuguese
59
+ 'it-f': 'it-IT-ElsaNeural', # Italian
60
+ 'nl-f': 'nl-NL-ColetteNeural', # Dutch
61
+ 'pl-f': 'pl-PL-AgnieszkaNeural', # Polish
62
+ }
63
+ ```
64
+
65
+ ---
66
+
67
+ ## gTTS (Google Text-to-Speech — FREE)
68
+
69
+ ```bash
70
+ pip install gtts --break-system-packages
71
+ ```
72
+
73
+ ```python
74
+ from gtts import gTTS
75
+ import subprocess
76
+
77
+ def gtts_generate(text: str, output_mp3: str, lang: str = 'en', slow: bool = False):
78
+ tts = gTTS(text=text, lang=lang, slow=slow)
79
+ tts.save(output_mp3)
80
+ return output_mp3
81
+
82
+ # Language codes: 'en', 'ru', 'de', 'fr', 'es', 'zh-CN', 'ja', 'ko', 'ar', 'hi', etc.
83
+ ```
84
+
85
+ ---
86
+
87
+ ## OpenAI TTS (via API — best quality)
88
+
89
+ Use when building Claude-powered artifacts. Call from JavaScript in artifacts:
90
+
91
+ ```javascript
92
+ // In artifact — OpenAI TTS via fetch
93
+ const response = await fetch("https://api.openai.com/v1/audio/speech", {
94
+ method: "POST",
95
+ headers: {
96
+ "Authorization": `Bearer ${OPENAI_KEY}`,
97
+ "Content-Type": "application/json"
98
+ },
99
+ body: JSON.stringify({
100
+ model: "tts-1-hd", // or "tts-1" for faster/cheaper
101
+ input: "Your text here",
102
+ voice: "alloy", // alloy, echo, fable, onyx, nova, shimmer
103
+ response_format: "mp3"
104
+ })
105
+ });
106
+ const audioBlob = await response.blob();
107
+ const url = URL.createObjectURL(audioBlob);
108
+ // play or download
109
+ ```
110
+
111
+ ### OpenAI Voice Options
112
+ - `alloy` — neutral, versatile
113
+ - `echo` — male, calm
114
+ - `fable` — British male, expressive
115
+ - `onyx` — deep male
116
+ - `nova` — female, warm
117
+ - `shimmer` — female, clear
118
+
119
+ ---
120
+
121
+ ## Choosing Between Engines
122
+
123
+ ```
124
+ Need best quality? → OpenAI TTS (if API key available)
125
+ Free + neural quality? → edge-tts (Microsoft Neural)
126
+ Simple + multilingual? → gTTS
127
+ No internet? → Kokoro ONNX (if models available) or pyttsx3
128
+ ```
@@ -0,0 +1,143 @@
1
+ # pyttsx3 + espeak-ng Reference
2
+
3
+ Primary TTS engine for this environment. Fully offline, 131+ languages.
4
+
5
+ ## Full API
6
+
7
+ ```python
8
+ import pyttsx3
9
+
10
+ engine = pyttsx3.init()
11
+
12
+ # Properties
13
+ engine.setProperty('rate', 145) # words per minute (100–200, default 200)
14
+ engine.setProperty('volume', 1.0) # 0.0–1.0
15
+ # Note: 'pitch' property is accepted but has no effect in espeak backend
16
+
17
+ # List all voices
18
+ voices = engine.getProperty('voices')
19
+ for v in voices:
20
+ print(v.id, v.name, v.languages)
21
+
22
+ # Set a specific voice
23
+ engine.setProperty('voice', 'gmw/en-gb-scotland')
24
+
25
+ # Speak (blocking, uses system audio — not useful in agent)
26
+ # engine.say("Hello"); engine.runAndWait()
27
+
28
+ # Save to file (USE THIS in agent context)
29
+ engine.save_to_file("Text to speak", '/tmp/output.wav')
30
+ engine.runAndWait()
31
+ ```
32
+
33
+ ## Recommended Voice IDs per Language
34
+
35
+ ```python
36
+ VOICE_MAP = {
37
+ 'en': 'gmw/en-gb-scotland', # clearest English
38
+ 'en-us': 'gmw/en-us',
39
+ 'ru': 'zle/ru',
40
+ 'de': 'gmw/de',
41
+ 'fr': 'roa/fr',
42
+ 'es': 'roa/es',
43
+ 'it': 'roa/it',
44
+ 'pt': 'roa/pt-pt',
45
+ 'nl': 'gmw/nl',
46
+ 'pl': 'zls/pl',
47
+ 'cs': 'zlw/cs',
48
+ 'zh': 'sit/cmn',
49
+ 'ja': 'jpn/ja',
50
+ 'ko': 'ko',
51
+ 'ar': 'sem/ar',
52
+ 'hi': 'inc/hi',
53
+ 'tr': 'trk/tr',
54
+ 'sv': 'gmw/sv',
55
+ 'da': 'gmw/da',
56
+ 'fi': 'urj/fi',
57
+ 'uk': 'zle/uk',
58
+ }
59
+ ```
60
+
61
+ ## Quality Optimization
62
+
63
+ ```python
64
+ def high_quality_tts(text: str, out_wav: str, lang: str = 'en', rate: int = 145):
65
+ import pyttsx3, subprocess
66
+
67
+ engine = pyttsx3.init()
68
+ engine.setProperty('rate', rate)
69
+ engine.setProperty('volume', 1.0)
70
+
71
+ voice_id = VOICE_MAP.get(lang, f'gmw/{lang}')
72
+ voices = engine.getProperty('voices')
73
+ matched = next((v for v in voices if v.id == voice_id), None)
74
+ if matched:
75
+ engine.setProperty('voice', matched.id)
76
+
77
+ engine.save_to_file(text, out_wav)
78
+ engine.runAndWait()
79
+
80
+ # Enhance with FFmpeg: resample to 44.1kHz + EQ for speech clarity
81
+ enhanced = out_wav.replace('.wav', '_enhanced.wav')
82
+ subprocess.run([
83
+ 'ffmpeg', '-i', out_wav,
84
+ '-af', 'aresample=44100,equalizer=f=3000:t=o:w=1:g=3,equalizer=f=200:t=o:w=1:g=-2,loudnorm=I=-16:TP=-1.5:LRA=11',
85
+ enhanced, '-y', '-loglevel', 'quiet'
86
+ ], check=True)
87
+
88
+ return enhanced
89
+ ```
90
+
91
+ ## Paragraph-by-Paragraph Generation
92
+
93
+ For long texts, generate in chunks to avoid engine timeouts:
94
+
95
+ ```python
96
+ def tts_long_text(paragraphs: list[str], output_mp3: str, lang: str = 'en'):
97
+ import pyttsx3, subprocess, os
98
+
99
+ chunks = []
100
+ engine = pyttsx3.init()
101
+ engine.setProperty('rate', 145)
102
+
103
+ for i, para in enumerate(paragraphs):
104
+ if not para.strip():
105
+ continue
106
+ wav = f'/tmp/chunk_{i}.wav'
107
+ engine.save_to_file(para, wav)
108
+ engine.runAndWait()
109
+ chunks.append(wav)
110
+
111
+ # Add 0.3s pause between paragraphs
112
+ concat_list = '/tmp/tts_concat.txt'
113
+ with open(concat_list, 'w') as f:
114
+ for wav in chunks:
115
+ f.write(f"file '{wav}'\n")
116
+
117
+ subprocess.run([
118
+ 'ffmpeg', '-f', 'concat', '-safe', '0', '-i', concat_list,
119
+ '-c:a', 'libmp3lame', '-b:a', '192k',
120
+ output_mp3, '-y', '-loglevel', 'quiet'
121
+ ], check=True)
122
+
123
+ # Cleanup
124
+ for wav in chunks:
125
+ os.unlink(wav)
126
+
127
+ return output_mp3
128
+ ```
129
+
130
+ ## Getting Audio Duration
131
+
132
+ ```python
133
+ import subprocess, json
134
+
135
+ def get_audio_duration(path: str) -> float:
136
+ """Returns duration in seconds."""
137
+ result = subprocess.run([
138
+ 'ffprobe', '-v', 'quiet', '-print_format', 'json',
139
+ '-show_format', path
140
+ ], capture_output=True, text=True)
141
+ data = json.loads(result.stdout)
142
+ return float(data['format']['duration'])
143
+ ```