@biggora/claude-plugins 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/.claude/settings.local.json +13 -0
  2. package/CLAUDE.md +55 -0
  3. package/LICENSE +1 -1
  4. package/README.md +208 -39
  5. package/bin/cli.js +39 -0
  6. package/package.json +30 -17
  7. package/registry/registry.json +166 -1
  8. package/registry/schema.json +10 -0
  9. package/src/commands/skills/add.js +194 -0
  10. package/src/commands/skills/list.js +52 -0
  11. package/src/commands/skills/remove.js +27 -0
  12. package/src/commands/skills/update.js +74 -0
  13. package/src/config.js +5 -0
  14. package/src/skills/codex-cli/SKILL.md +265 -0
  15. package/src/skills/commafeed-api/SKILL.md +1012 -0
  16. package/src/skills/gemini-cli/SKILL.md +379 -0
  17. package/src/skills/gemini-cli/references/commands.md +145 -0
  18. package/src/skills/gemini-cli/references/configuration.md +182 -0
  19. package/src/skills/gemini-cli/references/headless-and-scripting.md +181 -0
  20. package/src/skills/gemini-cli/references/mcp-and-extensions.md +254 -0
  21. package/src/skills/n8n-api/SKILL.md +623 -0
  22. package/src/skills/notebook-lm/SKILL.md +217 -0
  23. package/src/skills/notebook-lm/references/artifact-options.md +168 -0
  24. package/src/skills/notebook-lm/references/auth.md +58 -0
  25. package/src/skills/notebook-lm/references/workflows.md +144 -0
  26. package/src/skills/screen-recording/SKILL.md +309 -0
  27. package/src/skills/screen-recording/references/approach1-programmatic.md +311 -0
  28. package/src/skills/screen-recording/references/approach2-xvfb.md +232 -0
  29. package/src/skills/screen-recording/references/design-patterns.md +168 -0
  30. package/src/skills/test-mobile-app/SKILL.md +212 -0
  31. package/src/skills/test-mobile-app/references/report-template.md +95 -0
  32. package/src/skills/test-mobile-app/references/setup-appium.md +154 -0
  33. package/src/skills/test-mobile-app/scripts/analyze_apk.py +164 -0
  34. package/src/skills/test-mobile-app/scripts/check_environment.py +116 -0
  35. package/src/skills/test-mobile-app/scripts/generate_report.py +250 -0
  36. package/src/skills/test-mobile-app/scripts/run_tests.py +326 -0
  37. package/src/skills/test-web-ui/SKILL.md +232 -0
  38. package/src/skills/test-web-ui/references/test_case_schema.md +102 -0
  39. package/src/skills/test-web-ui/scripts/discover.py +176 -0
  40. package/src/skills/test-web-ui/scripts/generate_report.py +237 -0
  41. package/src/skills/test-web-ui/scripts/run_tests.py +296 -0
  42. package/src/skills/text-to-speech/SKILL.md +236 -0
  43. package/src/skills/text-to-speech/references/espeak-cli.md +277 -0
  44. package/src/skills/text-to-speech/references/kokoro-onnx.md +124 -0
  45. package/src/skills/text-to-speech/references/online-engines.md +128 -0
  46. package/src/skills/text-to-speech/references/pyttsx3-espeak.md +143 -0
  47. package/src/skills/tm-search/SKILL.md +240 -0
  48. package/src/skills/tm-search/references/field-guide.md +79 -0
  49. package/src/skills/tm-search/references/scraping-fallback.md +140 -0
  50. package/src/skills/tm-search/scripts/tm_search.py +375 -0
  51. package/src/skills/wp-rest-api/SKILL.md +114 -0
  52. package/src/skills/wp-rest-api/references/authentication.md +18 -0
  53. package/src/skills/wp-rest-api/references/custom-content-types.md +20 -0
  54. package/src/skills/wp-rest-api/references/discovery-and-params.md +20 -0
  55. package/src/skills/wp-rest-api/references/responses-and-fields.md +30 -0
  56. package/src/skills/wp-rest-api/references/routes-and-endpoints.md +36 -0
  57. package/src/skills/wp-rest-api/references/schema.md +22 -0
  58. package/src/skills/youtube-search/SKILL.md +412 -0
  59. package/src/skills/youtube-search/references/parsing-examples.md +159 -0
  60. package/src/skills/youtube-search/references/youtube-api-quota.md +85 -0
  61. package/src/skills/youtube-thumbnail/SKILL.md +1060 -0
  62. package/tests/commands/info.test.js +49 -0
  63. package/tests/commands/install.test.js +36 -0
  64. package/tests/commands/list.test.js +66 -0
  65. package/tests/commands/publish.test.js +182 -0
  66. package/tests/commands/search.test.js +45 -0
  67. package/tests/commands/uninstall.test.js +29 -0
  68. package/tests/commands/update.test.js +59 -0
  69. package/tests/functional/skills-lifecycle.test.js +293 -0
  70. package/tests/helpers/fixtures.js +63 -0
  71. package/tests/integration/cli.test.js +83 -0
  72. package/tests/skills/add.test.js +138 -0
  73. package/tests/skills/list.test.js +63 -0
  74. package/tests/skills/remove.test.js +38 -0
  75. package/tests/skills/update.test.js +60 -0
  76. package/tests/unit/config.test.js +31 -0
  77. package/tests/unit/registry.test.js +79 -0
  78. package/tests/unit/utils.test.js +150 -0
  79. package/tests/validation/registry-schema.test.js +112 -0
  80. package/tests/validation/skills-validation.test.js +96 -0
@@ -0,0 +1,296 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Web Tester - Test Execution Engine
4
+ Runs a structured test plan against a website using Playwright.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import sys
10
+ import time
11
+ import argparse
12
+ import base64
13
+ from datetime import datetime
14
+ from pathlib import Path
15
+
16
+ try:
17
+ from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeout
18
+ except ImportError:
19
+ print("ERROR: playwright not installed. Run: pip install playwright --break-system-packages")
20
+ sys.exit(1)
21
+
22
+
23
+ def encode_screenshot(path):
24
+ """Encode screenshot as base64 for embedding in report."""
25
+ try:
26
+ with open(path, 'rb') as f:
27
+ return base64.b64encode(f.read()).decode('utf-8')
28
+ except Exception:
29
+ return None
30
+
31
+
32
+ def run_single_test(page, test_case, screenshot_dir, base_url):
33
+ """Execute a single test case and return result dict."""
34
+ tc_id = test_case.get('id', 'UNKNOWN')
35
+ name = test_case.get('name', 'Unnamed test')
36
+ steps = test_case.get('steps', [])
37
+ assertions = test_case.get('assertions', [])
38
+ target_path = test_case.get('path', '/')
39
+ mobile = test_case.get('mobile', False)
40
+
41
+ result = {
42
+ 'id': tc_id,
43
+ 'name': name,
44
+ 'status': 'PASS',
45
+ 'errors': [],
46
+ 'warnings': [],
47
+ 'console_errors': [],
48
+ 'duration_ms': 0,
49
+ 'screenshot_before': None,
50
+ 'screenshot_after': None,
51
+ 'assertions_total': len(assertions),
52
+ 'assertions_passed': 0,
53
+ }
54
+
55
+ start_time = time.time()
56
+
57
+ # Collect console errors
58
+ console_errors = []
59
+ page.on('console', lambda msg: console_errors.append({
60
+ 'type': msg.type,
61
+ 'text': msg.text
62
+ }) if msg.type in ('error', 'warning') else None)
63
+
64
+ try:
65
+ # Navigate to target
66
+ target_url = base_url.rstrip('/') + target_path
67
+ page.goto(target_url, wait_until='domcontentloaded', timeout=15000)
68
+ page.wait_for_load_state('networkidle', timeout=10000)
69
+ except PlaywrightTimeout:
70
+ result['status'] = 'FAIL'
71
+ result['errors'].append(f'Page load timeout: {target_url}')
72
+ result['duration_ms'] = int((time.time() - start_time) * 1000)
73
+ return result
74
+ except Exception as e:
75
+ result['status'] = 'FAIL'
76
+ result['errors'].append(f'Navigation error: {str(e)}')
77
+ result['duration_ms'] = int((time.time() - start_time) * 1000)
78
+ return result
79
+
80
+ # Screenshot before interactions
81
+ before_path = os.path.join(screenshot_dir, f'{tc_id}_before.png')
82
+ try:
83
+ page.screenshot(path=before_path, full_page=True, timeout=5000)
84
+ result['screenshot_before'] = before_path
85
+ except Exception:
86
+ pass
87
+
88
+ # Execute steps (interactions)
89
+ for step in steps:
90
+ action = step.get('action', '')
91
+ selector = step.get('selector', '')
92
+ value = step.get('value', '')
93
+ try:
94
+ if action == 'click':
95
+ page.locator(selector).first.click(timeout=5000)
96
+ page.wait_for_load_state('networkidle', timeout=8000)
97
+ elif action == 'fill':
98
+ page.locator(selector).first.fill(value, timeout=5000)
99
+ elif action == 'wait_for':
100
+ page.wait_for_selector(selector, timeout=8000)
101
+ elif action == 'scroll':
102
+ page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
103
+ time.sleep(0.5)
104
+ elif action == 'hover':
105
+ page.locator(selector).first.hover(timeout=5000)
106
+ except Exception as e:
107
+ result['warnings'].append(f'Step [{action} {selector}] failed: {str(e)[:100]}')
108
+
109
+ # Screenshot after interactions
110
+ if steps:
111
+ after_path = os.path.join(screenshot_dir, f'{tc_id}_after.png')
112
+ try:
113
+ page.screenshot(path=after_path, full_page=True, timeout=5000)
114
+ result['screenshot_after'] = after_path
115
+ except Exception:
116
+ pass
117
+
118
+ # Run assertions
119
+ for assertion in assertions:
120
+ atype = assertion.get('type', '')
121
+ selector = assertion.get('selector', '')
122
+ expected = assertion.get('expected', '')
123
+ description = assertion.get('description', f'{atype} {selector}')
124
+
125
+ try:
126
+ passed = False
127
+
128
+ if atype == 'visible':
129
+ passed = page.locator(selector).first.is_visible()
130
+ elif atype == 'not_visible':
131
+ passed = not page.locator(selector).first.is_visible()
132
+ elif atype == 'text_contains':
133
+ text = page.locator(selector).first.text_content(timeout=5000) or ''
134
+ passed = expected.lower() in text.lower()
135
+ elif atype == 'title_not_empty':
136
+ passed = bool(page.title().strip())
137
+ elif atype == 'title_contains':
138
+ passed = expected.lower() in page.title().lower()
139
+ elif atype == 'count_gt':
140
+ count = page.locator(selector).count()
141
+ passed = count > int(expected)
142
+ elif atype == 'count_eq':
143
+ count = page.locator(selector).count()
144
+ passed = count == int(expected)
145
+ elif atype == 'url_contains':
146
+ passed = expected in page.url
147
+ elif atype == 'no_console_errors':
148
+ passed = not any(e['type'] == 'error' for e in console_errors)
149
+ elif atype == 'images_loaded':
150
+ # Check that all images have naturalWidth > 0 (loaded)
151
+ broken = page.evaluate("""
152
+ () => Array.from(document.images)
153
+ .filter(img => img.naturalWidth === 0 && img.src && !img.src.startsWith('data:'))
154
+ .map(img => img.src)
155
+ """)
156
+ passed = len(broken) == 0
157
+ if not passed:
158
+ result['warnings'].append(f'Broken images: {broken[:3]}')
159
+ elif atype == 'has_alt_text':
160
+ missing_alt = page.evaluate("""
161
+ () => Array.from(document.images)
162
+ .filter(img => !img.alt || img.alt.trim() === '')
163
+ .length
164
+ """)
165
+ passed = missing_alt == 0
166
+ if not passed:
167
+ result['warnings'].append(f'{missing_alt} images missing alt text')
168
+ elif atype == 'clickable':
169
+ el = page.locator(selector).first
170
+ passed = el.is_visible() and el.is_enabled()
171
+ elif atype == 'attribute_equals':
172
+ attr_name = assertion.get('attribute', '')
173
+ val = page.locator(selector).first.get_attribute(attr_name) or ''
174
+ passed = val == expected
175
+ else:
176
+ result['warnings'].append(f'Unknown assertion type: {atype}')
177
+ continue
178
+
179
+ if passed:
180
+ result['assertions_passed'] += 1
181
+ else:
182
+ result['status'] = 'FAIL'
183
+ result['errors'].append(f'FAIL: {description}')
184
+
185
+ except Exception as e:
186
+ result['status'] = 'FAIL'
187
+ result['errors'].append(f'ERROR in [{description}]: {str(e)[:120]}')
188
+
189
+ # Collect console errors
190
+ result['console_errors'] = [e for e in console_errors if e['type'] == 'error']
191
+ if result['console_errors'] and result['status'] == 'PASS':
192
+ # Downgrade to WARNING, not FAIL — console errors are informational
193
+ result['warnings'].append(f"{len(result['console_errors'])} JS console error(s) detected")
194
+
195
+ result['duration_ms'] = int((time.time() - start_time) * 1000)
196
+ return result
197
+
198
+
199
+ def run_tests(url, test_plan_path, output_dir):
200
+ """Main test runner."""
201
+ os.makedirs(output_dir, exist_ok=True)
202
+ screenshot_dir = os.path.join(output_dir, 'screenshots')
203
+ os.makedirs(screenshot_dir, exist_ok=True)
204
+
205
+ with open(test_plan_path, 'r', encoding='utf-8') as f:
206
+ test_plan = json.load(f)
207
+
208
+ test_cases = test_plan.get('test_cases', [])
209
+ meta = test_plan.get('meta', {})
210
+
211
+ print(f"\n{'='*60}")
212
+ print(f" WEB TESTER — Execution Engine")
213
+ print(f" URL: {url}")
214
+ print(f" Tests: {len(test_cases)}")
215
+ print(f" Started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
216
+ print(f"{'='*60}\n")
217
+
218
+ all_results = []
219
+
220
+ with sync_playwright() as p:
221
+ browser = p.chromium.launch(args=[
222
+ '--no-sandbox',
223
+ '--disable-dev-shm-usage',
224
+ '--disable-gpu',
225
+ ])
226
+
227
+ for tc in test_cases:
228
+ mobile = tc.get('mobile', False)
229
+ if mobile:
230
+ context = browser.new_context(
231
+ viewport={'width': 390, 'height': 844},
232
+ is_mobile=True,
233
+ user_agent='Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X)'
234
+ )
235
+ else:
236
+ context = browser.new_context(
237
+ viewport={'width': 1280, 'height': 800}
238
+ )
239
+
240
+ page = context.new_page()
241
+
242
+ print(f" [{tc.get('id', '?')}] {tc.get('name', '?')} ...", end=' ', flush=True)
243
+ result = run_single_test(page, tc, screenshot_dir, url)
244
+ all_results.append(result)
245
+
246
+ status_icon = '✓' if result['status'] == 'PASS' else '✗'
247
+ print(f"{status_icon} {result['status']} ({result['duration_ms']}ms)")
248
+ for err in result['errors']:
249
+ print(f" ↳ {err}")
250
+
251
+ context.close()
252
+
253
+ browser.close()
254
+
255
+ # Summary
256
+ total = len(all_results)
257
+ passed = sum(1 for r in all_results if r['status'] == 'PASS')
258
+ failed = sum(1 for r in all_results if r['status'] == 'FAIL')
259
+ skipped = sum(1 for r in all_results if r['status'] == 'SKIP')
260
+
261
+ print(f"\n{'='*60}")
262
+ print(f" RESULTS: {passed}/{total} passed | {failed} failed | {skipped} skipped")
263
+ print(f" Pass rate: {int(passed/total*100) if total else 0}%")
264
+ print(f"{'='*60}\n")
265
+
266
+ # Save results
267
+ output = {
268
+ 'meta': {
269
+ **meta,
270
+ 'url': url,
271
+ 'run_at': datetime.now().isoformat(),
272
+ 'total': total,
273
+ 'passed': passed,
274
+ 'failed': failed,
275
+ 'skipped': skipped,
276
+ 'pass_rate': int(passed/total*100) if total else 0,
277
+ },
278
+ 'results': all_results
279
+ }
280
+
281
+ results_path = os.path.join(output_dir, 'results.json')
282
+ with open(results_path, 'w', encoding='utf-8') as f:
283
+ json.dump(output, f, ensure_ascii=False, indent=2)
284
+
285
+ print(f" Results saved: {results_path}")
286
+ return output
287
+
288
+
289
+ if __name__ == '__main__':
290
+ parser = argparse.ArgumentParser(description='Web Tester - Execution Engine')
291
+ parser.add_argument('--url', required=True, help='Base URL to test')
292
+ parser.add_argument('--test-plan', required=True, help='Path to test_plan.json')
293
+ parser.add_argument('--output', default='/home/claude/test_results', help='Output directory')
294
+ args = parser.parse_args()
295
+
296
+ run_tests(args.url, args.test_plan, args.output)
@@ -0,0 +1,236 @@
1
+ ---
2
+ name: text-to-speech
3
+ description: >
4
+ Converts any text to speech audio files fully autonomously, without user intervention.
5
+ Use this skill whenever a user asks to: "convert text to speech", "generate audio narration",
6
+ "create a voiceover", "make a TTS audio file", "add narration to a video", "generate MP3/WAV
7
+ from text", "create audio for a presentation", or anything involving producing spoken audio
8
+ from written text. Also trigger when user needs audio narration for product videos, demos,
9
+ slideshows, or automated workflows. Always use this skill before generating any video narration,
10
+ even if narration seems like a minor part of the task.
11
+ ---
12
+
13
+ # Text-to-Speech (TTS) Skill
14
+
15
+ Fully autonomous audio generation pipeline. No user intervention required.
16
+
17
+ ## What this skill produces
18
+
19
+ - **MP3 / WAV audio files** from any text input
20
+ - **Multilingual narration** (131+ languages via espeak-ng)
21
+ - **Video-ready voiceovers** for product demos and presentations
22
+ - **Batch narration** for automated workflows
23
+
24
+ ---
25
+
26
+ ## Engine Selection Guide
27
+
28
+ ### Confirmed Working Engines (ordered by quality)
29
+
30
+ | Engine | Quality | Speed | Languages | Notes |
31
+ |---|---|---|---|---|
32
+ | **pyttsx3 + espeak-ng** | ★★★☆ | Fast | 131+ | **PRIMARY — always available** |
33
+ | **espeak-ng CLI** | ★★★☆ | Fast | 131+ | Direct CLI, same backend |
34
+ | **flite** | ★★☆☆ | Very fast | EN only | Lightweight fallback |
35
+ | **Kokoro ONNX** | ★★★★★ | Medium | EN, ZH, JA, KO, FR, ES, HI, PT, IT, BR | High-quality neural TTS — **use if model files available** |
36
+ | **gTTS** | ★★★★☆ | Fast | 40+ | Google Neural — needs internet |
37
+ | **edge-tts** | ★★★★★ | Fast | 100+ | Microsoft Neural — needs internet |
38
+
39
+ > ⚠️ **Environment constraint**: This agent has no internet access. Use pyttsx3/espeak-ng or Kokoro (offline). For production with internet access, prefer edge-tts or gTTS.
40
+
41
+ ---
42
+
43
+ ## Quick Start
44
+
45
+ ### Standard Narration (always works)
46
+
47
+ ```python
48
+ import pyttsx3
49
+ import subprocess
50
+
51
+ def generate_tts(text: str, output_mp3: str, lang: str = "en", rate: int = 145, voice_id: str = None):
52
+ """Generate TTS audio. lang = 'en', 'ru', 'de', 'fr', 'es', 'zh', etc."""
53
+ engine = pyttsx3.init()
54
+ engine.setProperty('rate', rate) # 100–200, 145 = natural
55
+ engine.setProperty('volume', 1.0)
56
+
57
+ # Select voice by language
58
+ if voice_id:
59
+ engine.setProperty('voice', voice_id)
60
+ else:
61
+ voices = engine.getProperty('voices')
62
+ for v in voices:
63
+ if lang == 'en' and 'en-gb' in v.id.lower():
64
+ engine.setProperty('voice', v.id)
65
+ break
66
+ elif lang != 'en' and lang in v.id.lower() and 'lv' not in v.id:
67
+ engine.setProperty('voice', v.id)
68
+ break
69
+
70
+ wav_path = output_mp3.replace('.mp3', '.wav')
71
+ engine.save_to_file(text, wav_path)
72
+ engine.runAndWait()
73
+
74
+ # Convert WAV → MP3 with audio enhancement
75
+ subprocess.run([
76
+ 'ffmpeg', '-i', wav_path,
77
+ '-af', 'aresample=44100,equalizer=f=3000:t=o:w=1:g=3,equalizer=f=200:t=o:w=1:g=-2',
78
+ '-c:a', 'libmp3lame', '-b:a', '192k',
79
+ output_mp3, '-y', '-loglevel', 'quiet'
80
+ ], check=True)
81
+
82
+ return output_mp3
83
+
84
+ # Example usage
85
+ generate_tts("Welcome to our product demo.", "/tmp/narration.mp3", lang="en")
86
+ generate_tts("Добро пожаловать в демонстрацию продукта.", "/tmp/narration_ru.mp3", lang="ru")
87
+ ```
88
+
89
+ ---
90
+
91
+ ## Installation (run once per session if needed)
92
+
93
+ ```bash
94
+ pip install pyttsx3 --break-system-packages -q
95
+ apt-get install -y espeak-ng -q
96
+
97
+ # Verify
98
+ python3 -c "import pyttsx3; e=pyttsx3.init(); print('OK:', len(e.getProperty('voices')), 'voices')"
99
+ ```
100
+
101
+ ---
102
+
103
+ ## Engine Details
104
+
105
+ Read the appropriate reference file for the engine you're using:
106
+
107
+ - **`references/pyttsx3-espeak.md`** — Primary engine: full API, voice selection, SSML-like control, quality tips
108
+ - **`references/espeak-cli.md`** — Direct espeak-ng CLI usage, flags, phoneme control
109
+ - **`references/kokoro-onnx.md`** — High-quality neural TTS (offline, needs model download)
110
+ - **`references/online-engines.md`** — gTTS, edge-tts, OpenAI TTS (when internet available)
111
+
112
+ ---
113
+
114
+ ## Multi-scene Narration (for videos)
115
+
116
+ For video narration with multiple scenes, generate per-scene audio then concatenate:
117
+
118
+ ```python
119
+ import pyttsx3, subprocess, os
120
+
121
+ scenes = [
122
+ {"text": "Welcome to our AI-powered platform.", "duration_hint": 3},
123
+ {"text": "Our system automatically detects and fixes issues.", "duration_hint": 5},
124
+ {"text": "Get started today with a free trial.", "duration_hint": 3},
125
+ ]
126
+
127
+ def scenes_to_audio(scenes: list, output_path: str, lang: str = "en") -> str:
128
+ """Generate concatenated narration from scene list."""
129
+ wav_files = []
130
+ engine = pyttsx3.init()
131
+ engine.setProperty('rate', 145)
132
+
133
+ for i, scene in enumerate(scenes):
134
+ wav = f"/tmp/scene_{i}.wav"
135
+ engine.save_to_file(scene["text"], wav)
136
+ engine.runAndWait()
137
+ wav_files.append(wav)
138
+
139
+ # Build concat list for ffmpeg
140
+ concat_txt = "/tmp/concat_list.txt"
141
+ with open(concat_txt, 'w') as f:
142
+ for wav in wav_files:
143
+ f.write(f"file '{wav}'\n")
144
+
145
+ subprocess.run([
146
+ 'ffmpeg', '-f', 'concat', '-safe', '0', '-i', concat_txt,
147
+ '-c:a', 'libmp3lame', '-b:a', '192k',
148
+ output_path, '-y', '-loglevel', 'quiet'
149
+ ], check=True)
150
+
151
+ return output_path
152
+
153
+ scenes_to_audio(scenes, "/tmp/full_narration.mp3")
154
+ ```
155
+
156
+ ---
157
+
158
+ ## Language Reference (top languages)
159
+
160
+ | Code | Language | espeak-ng voice ID |
161
+ |------|----------|--------------------|
162
+ | `en` | English (GB) | `gmw/en-gb-scotland` |
163
+ | `en-us` | English (US) | `gmw/en-us` |
164
+ | `ru` | Russian | `zle/ru` |
165
+ | `de` | German | `gmw/de` |
166
+ | `fr` | French | `roa/fr` |
167
+ | `es` | Spanish | `roa/es` |
168
+ | `zh` | Chinese (Mandarin) | `sit/cmn` |
169
+ | `ar` | Arabic | `sem/ar` |
170
+ | `ja` | Japanese | `jpn/ja` |
171
+ | `pt` | Portuguese | `roa/pt` |
172
+
173
+ Full list: `espeak-ng --voices`
174
+
175
+ ---
176
+
177
+ ## FFmpeg Audio Post-processing
178
+
179
+ ```bash
180
+ # WAV → MP3 (standard)
181
+ ffmpeg -i input.wav -c:a libmp3lame -b:a 192k output.mp3 -y
182
+
183
+ # WAV → MP3 with EQ enhancement (clearer speech)
184
+ ffmpeg -i input.wav \
185
+ -af "aresample=44100,equalizer=f=3000:t=o:w=1:g=3,equalizer=f=200:t=o:w=1:g=-2" \
186
+ -c:a libmp3lame -b:a 192k output.mp3 -y
187
+
188
+ # Adjust speech speed without pitch change (0.85 = slower, 1.15 = faster)
189
+ ffmpeg -i input.wav -af "atempo=0.90" output_slow.wav -y
190
+
191
+ # Add silence padding (0.5s before, 0.5s after)
192
+ ffmpeg -i input.wav -af "adelay=500|500,apad=pad_dur=0.5" output_padded.wav -y
193
+
194
+ # Normalize audio volume
195
+ ffmpeg -i input.wav -af "loudnorm=I=-16:TP=-1.5:LRA=11" output_norm.wav -y
196
+ ```
197
+
198
+ ---
199
+
200
+ ## Common Pitfalls
201
+
202
+ | Problem | Solution |
203
+ |---|---|
204
+ | `pyttsx3` hangs / no audio | Run `apt-get install -y espeak-ng` first |
205
+ | Russian text sounds robotic | Use `rate=130`, `engine.setProperty('voice', 'zle/ru')` |
206
+ | Audio too quiet | Add `-af "volume=2.0"` in ffmpeg or set `engine.setProperty('volume', 1.0)` |
207
+ | gTTS / edge-tts timeout | No internet in this environment — use pyttsx3 |
208
+ | Kokoro needs model files | Download from HuggingFace when internet is available; see `references/kokoro-onnx.md` |
209
+ | Audio/video sync off in video | Use `ffprobe` to get exact audio duration; see screen-recording skill |
210
+ | Characters not spoken (symbols) | Pre-process text: strip `*`, `#`, `>`, `|` before passing to TTS |
211
+
212
+ ---
213
+
214
+ ## Text Pre-processing
215
+
216
+ Always clean text before TTS to avoid robotic artifacts:
217
+
218
+ ```python
219
+ import re
220
+
221
+ def clean_for_tts(text: str) -> str:
222
+ """Remove markdown and symbols that confuse TTS engines."""
223
+ text = re.sub(r'#{1,6}\s*', '', text) # headers
224
+ text = re.sub(r'\*{1,2}(.+?)\*{1,2}', r'\1', text) # bold/italic
225
+ text = re.sub(r'`{1,3}[^`]*`{1,3}', '', text) # code blocks
226
+ text = re.sub(r'\[(.+?)\]\(.+?\)', r'\1', text) # links → link text
227
+ text = re.sub(r'[|>]', '', text) # table/quote chars
228
+ text = re.sub(r'\s+', ' ', text).strip()
229
+ return text
230
+ ```
231
+
232
+ ---
233
+
234
+ ## Integration with Screen Recording Skill
235
+
236
+ When used inside the screen-recording skill, replace the basic pyttsx3 call with this skill's `generate_tts()` function for better audio quality and language support. The audio pipeline is identical — just swap the TTS step.