@thierrynakoa/fire-flow 12.2.1 → 13.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (500) hide show
  1. package/CREDITS.md +25 -0
  2. package/DOMINION-FLOW-OVERVIEW.md +182 -38
  3. package/README.md +399 -455
  4. package/TROUBLESHOOTING.md +264 -264
  5. package/agents/fire-debugger.md +54 -0
  6. package/agents/fire-executor.md +1610 -1033
  7. package/agents/fire-fact-checker.md +1 -1
  8. package/agents/fire-planner.md +85 -17
  9. package/agents/fire-project-researcher.md +1 -1
  10. package/agents/fire-researcher.md +4 -22
  11. package/agents/{fire-phoenix-analyst.md → fire-resurrection-analyst.md} +394 -394
  12. package/agents/fire-reviewer.md +552 -499
  13. package/agents/fire-verifier.md +114 -19
  14. package/bin/cli.js +18 -101
  15. package/commands/fire-0-orient.md +2 -2
  16. package/commands/fire-1a-new.md +50 -15
  17. package/commands/fire-1c-setup.md +33 -5
  18. package/commands/fire-1d-discuss.md +87 -1
  19. package/commands/fire-2-plan.md +556 -527
  20. package/commands/fire-3-execute.md +2046 -1356
  21. package/commands/fire-4-verify.md +975 -906
  22. package/commands/fire-5-handoff.md +46 -5
  23. package/commands/fire-6-resume.md +2 -31
  24. package/commands/fire-add-new-skill.md +138 -19
  25. package/commands/fire-autonomous.md +14 -2
  26. package/commands/fire-complete-milestone.md +1 -1
  27. package/commands/fire-cost.md +179 -183
  28. package/commands/fire-debug.md +1 -6
  29. package/commands/fire-loop-resume.md +2 -2
  30. package/commands/fire-loop-stop.md +1 -1
  31. package/commands/fire-loop.md +2 -15
  32. package/commands/fire-map-codebase.md +1 -1
  33. package/commands/fire-migrate-database.md +548 -0
  34. package/commands/fire-new-milestone.md +1 -1
  35. package/commands/fire-reflect.md +1 -2
  36. package/commands/fire-research.md +142 -21
  37. package/commands/{fire-phoenix.md → fire-resurrect.md} +859 -603
  38. package/commands/fire-scaffold.md +297 -0
  39. package/commands/fire-search.md +1 -2
  40. package/commands/fire-security-scan.md +483 -484
  41. package/commands/fire-setup.md +359 -0
  42. package/commands/fire-skill.md +770 -0
  43. package/commands/fire-skills-diff.md +506 -506
  44. package/commands/fire-skills-history.md +388 -388
  45. package/commands/fire-skills-rollback.md +7 -7
  46. package/commands/fire-skills-sync.md +470 -470
  47. package/commands/fire-test.md +5 -5
  48. package/commands/fire-todos.md +1 -1
  49. package/commands/fire-update.md +5 -5
  50. package/commands/fire-validate-skills.md +282 -0
  51. package/commands/fire-vuln-scan.md +492 -493
  52. package/hooks/run-hook.sh +8 -8
  53. package/hooks/run-session-end.sh +7 -7
  54. package/hooks/session-end.sh +90 -90
  55. package/hooks/session-start.sh +1 -1
  56. package/package.json +4 -24
  57. package/plugin.json +7 -7
  58. package/references/autonomy-levels.md +235 -0
  59. package/references/behavioral-directives.md +95 -3
  60. package/references/blocker-tracking.md +1 -1
  61. package/references/circuit-breaker.md +93 -2
  62. package/references/context-engineering.md +227 -9
  63. package/references/honesty-protocols.md +70 -1
  64. package/references/issue-to-pr-pipeline.md +149 -150
  65. package/references/metrics-and-trends.md +1 -2
  66. package/references/research-improvements.md +4 -108
  67. package/references/sdlc-mapping.md +73 -0
  68. package/references/state-machine.md +151 -0
  69. package/skills-library/AVAILABLE_TOOLS_REFERENCE.md +333 -0
  70. package/skills-library/SKILLS-INDEX.md +57 -558
  71. package/skills-library/SKILLS_LIBRARY_INDEX.md +532 -0
  72. package/skills-library/_general/api-patterns/api-field-name-mismatch.md +107 -0
  73. package/skills-library/_general/api-patterns/streaming-command-timeout.md +122 -0
  74. package/skills-library/_general/api-patterns/streaming-proxy-cors-bypass.md +102 -0
  75. package/skills-library/_general/automation/settings-gui-generator.md +172 -0
  76. package/skills-library/_general/database-solutions/data-type-mapping-reference.md +181 -0
  77. package/skills-library/_general/database-solutions/mysql-limit-offset-string-coercion.md +102 -0
  78. package/skills-library/_general/database-solutions/mysql-to-pg-migration.md +195 -0
  79. package/skills-library/_general/database-solutions/orm-schema-portability.md +193 -0
  80. package/skills-library/_general/database-solutions/persistent-analysis-storage.md +207 -0
  81. package/skills-library/_general/database-solutions/pg-to-mysql-schema-migration-methodology.md +190 -0
  82. package/skills-library/_general/database-solutions/sql-dialect-compatibility-matrix.md +306 -0
  83. package/skills-library/_general/database-solutions/sqlite-to-pg-migration.md +219 -0
  84. package/skills-library/_general/frontend/canvas-bubble-animation-grouping.md +270 -0
  85. package/skills-library/_general/frontend/color-token-migration.md +112 -0
  86. package/skills-library/_general/frontend/framer-motion-layoutid-grouping.md +150 -0
  87. package/skills-library/_general/frontend/pyqt6-settings-dialog.md +191 -0
  88. package/skills-library/_general/frontend/react-flow-animated-layout-switching.md +101 -0
  89. package/skills-library/_general/frontend/react-hooks-order-debugging.md +141 -0
  90. package/skills-library/_general/frontend/redux-localstorage-auth-desync.md +126 -0
  91. package/skills-library/_general/frontend/safari-csp-theme-color-debugging.md +124 -0
  92. package/skills-library/_general/frontend/safari-sw-cache-poisoning.md +138 -0
  93. package/skills-library/_general/frontend/svg-sparkline-no-charting-library.md +131 -0
  94. package/skills-library/_general/growth-marketing/oss-daily-growth-intelligence.md +224 -0
  95. package/skills-library/_general/integrations/claude-code-local-mcp-integration.md +250 -0
  96. package/skills-library/_general/integrations/mcp-composite-tool-orchestration.md +200 -0
  97. package/skills-library/_general/methodology/AGENT_SDK_STANDALONE_TOOLING.md +181 -0
  98. package/skills-library/_general/methodology/AGENT_TEAMS_GUIDE.md +169 -0
  99. package/skills-library/_general/methodology/ALAS_STATEFUL_EXECUTION.md +207 -0
  100. package/skills-library/_general/methodology/AUTO_REVIEWER_SUBAGENT.md +211 -0
  101. package/skills-library/_general/methodology/CONSISTENCY_CHECK_AMBIGUITY_GATE.md +96 -0
  102. package/skills-library/_general/methodology/DEAD_ENDS_SHELF.md +4 -4
  103. package/skills-library/_general/methodology/DISTILL_NOT_DUMP.md +108 -0
  104. package/skills-library/_general/methodology/EXECUTION_PROGRESS_MONITOR.md +157 -0
  105. package/skills-library/_general/methodology/HIERARCHICAL_REVIEW_MARS.md +122 -0
  106. package/skills-library/_general/methodology/MCP_INTER_AGENT_BRIDGE.md +207 -0
  107. package/skills-library/_general/methodology/MERMAID_WIZARD_DIAGRAMS.md +77 -0
  108. package/skills-library/_general/methodology/MISSING_DIMENSION_DETECTOR.md +89 -0
  109. package/skills-library/_general/methodology/MULTI_AGENT_COORDINATION.md +397 -0
  110. package/skills-library/_general/methodology/OBSERVATION_MASKING.md +100 -0
  111. package/skills-library/_general/methodology/PHOENIX_REBUILD_METHODOLOGY.md +82 -11
  112. package/skills-library/_general/methodology/REVIEW_BACKTRACK_PANEL.md +140 -0
  113. package/skills-library/_general/methodology/REVIEW_FIX_LOOP.md +117 -0
  114. package/skills-library/_general/methodology/VOTING_VERDICT_ARBITRATION.md +155 -0
  115. package/skills-library/_general/methodology/ZERO_FRICTION_CLI_SETUP.md +2 -2
  116. package/skills-library/_general/methodology/dead-code-activation.md +123 -0
  117. package/skills-library/_general/methodology/debug-swarm-researcher-escape-hatch.md +240 -240
  118. package/skills-library/_general/methodology/shell-autonomous-loop-fixplan.md +1 -1
  119. package/skills-library/_general/patterns-standards/GOF_DESIGN_PATTERNS_FOR_AI_AGENTS.md +5 -5
  120. package/skills-library/_general/patterns-standards/cascading-failure-diagnosis.md +119 -0
  121. package/skills-library/_general/patterns-standards/domain-specific-layout-algorithms.md +209 -0
  122. package/skills-library/_general/patterns-standards/python-desktop-app-architecture.md +399 -0
  123. package/skills-library/_general/patterns-standards/realtime-monitoring-dashboard.md +457 -0
  124. package/skills-library/_general/patterns-standards/togglable-processing-pipeline.md +169 -0
  125. package/skills-library/_general/performance/liveclock-extraction.md +112 -0
  126. package/skills-library/_general/performance/ref-based-canvas-animation.md +117 -0
  127. package/skills-library/_general/performance/use-visible-interval.md +131 -0
  128. package/skills-library/_general/testing/playwright-firefox-withcredentials-auth-issue.md +104 -0
  129. package/skills-library/_quarantine/README.md +30 -0
  130. package/skills-library/api-patterns/BROADCAST_SCHEDULER_SHARED_EXECUTE_FUNCTION.md +150 -0
  131. package/skills-library/api-patterns/ERROR_RESPONSE_STANDARDS.md +145 -0
  132. package/skills-library/api-patterns/EXPRESS_ROUTE_ORDERING_MIDDLEWARE_INTERCEPTION.md +326 -0
  133. package/skills-library/api-patterns/PAGINATION_PATTERNS.md +137 -0
  134. package/skills-library/api-patterns/PODCAST_PROGRESS_TRACKING_THREE_ROOT_CAUSES.md +277 -0
  135. package/skills-library/api-patterns/RATE_LIMITING_TOGGLE.md +155 -0
  136. package/skills-library/api-patterns/graphql-content-queries.md +708 -0
  137. package/skills-library/appointment-scheduler-design.md +423 -0
  138. package/skills-library/automation/AUTO_POPULATE_COMPLETE_GUIDE.md +631 -0
  139. package/skills-library/automation/CC_WORKFLOW_STUDIO.md +83 -0
  140. package/skills-library/automation/CLAUDE_CODE_SWARM_MODE.md +95 -0
  141. package/skills-library/automation/DAEMON_TRIGGER_FILE_IPC.md +195 -0
  142. package/skills-library/automation/scheduled-content-publishing.md +608 -0
  143. package/skills-library/awesome-workflows/Blogging-Platform-Instructions/view_commands.md +25 -0
  144. package/skills-library/awesome-workflows/CREDENTIAL-SECURITY-WORKFLOW.md +109 -0
  145. package/skills-library/awesome-workflows/DEBUGGING-WORKFLOW.md +124 -0
  146. package/skills-library/awesome-workflows/Design-Review-Workflow/README.md +31 -0
  147. package/skills-library/awesome-workflows/Design-Review-Workflow/design-principles-example.md +129 -0
  148. package/skills-library/awesome-workflows/Design-Review-Workflow/design-review-agent.md +107 -0
  149. package/skills-library/awesome-workflows/Design-Review-Workflow/design-review-claude-md-snippet.md +24 -0
  150. package/skills-library/awesome-workflows/Design-Review-Workflow/design-review-slash-command.md +38 -0
  151. package/skills-library/awesome-workflows/PARALLEL-RESEARCH-WORKFLOW.md +89 -0
  152. package/skills-library/awesome-workflows/PHASE-EXECUTION-WORKFLOW.md +97 -0
  153. package/skills-library/awesome-workflows/SESSION-HANDOFF-WORKFLOW.md +116 -0
  154. package/skills-library/cms-patterns/content-branch-preview.md +515 -0
  155. package/skills-library/cms-patterns/inline-visual-editing.md +666 -0
  156. package/skills-library/cms-patterns/mdx-component-content.md +649 -0
  157. package/skills-library/cms-patterns/media-manager-abstraction.md +827 -0
  158. package/skills-library/cms-patterns/schema-driven-form-generator.md +838 -0
  159. package/skills-library/complexity-metrics/complexity-divider.md +707 -0
  160. package/skills-library/complexity-metrics/work-with-complexity.md +193 -0
  161. package/skills-library/creative-multimedia/animation-stack-guide.md +577 -0
  162. package/skills-library/creative-multimedia/audio-enhancement-pipeline.md +625 -0
  163. package/skills-library/creative-multimedia/content-repurposing-pipeline.md +1146 -0
  164. package/skills-library/creative-multimedia/data-visualization-generator.md +862 -0
  165. package/skills-library/creative-multimedia/doc-to-podcast-pipeline.md +2184 -0
  166. package/skills-library/creative-multimedia/ffmpeg-command-generator.md +405 -0
  167. package/skills-library/creative-multimedia/image-optimization-pipeline.md +605 -0
  168. package/skills-library/creative-multimedia/multi-format-content-generator.md +1759 -0
  169. package/skills-library/creative-multimedia/og-image-generator.md +635 -0
  170. package/skills-library/creative-multimedia/podcast-audio-composition.md +1355 -0
  171. package/skills-library/creative-multimedia/podcast-quality-evaluation.md +1452 -0
  172. package/skills-library/creative-multimedia/podcast-script-generation.md +1841 -0
  173. package/skills-library/creative-multimedia/svg-generation.md +750 -0
  174. package/skills-library/creative-multimedia/text-to-speech-provider-selector.md +1414 -0
  175. package/skills-library/creative-multimedia/transcription-pipeline-selector.md +677 -0
  176. package/skills-library/creative-multimedia/video-streaming-setup.md +559 -0
  177. package/skills-library/database-solutions/AI_RESPONSE_DATABASE_CACHING.md +520 -0
  178. package/skills-library/database-solutions/CONDITIONAL_SQL_MIGRATION_PATTERN.md +119 -0
  179. package/skills-library/database-solutions/DATABASE_COLUMN_NAME_MISMATCH.md +393 -0
  180. package/skills-library/database-solutions/DATABASE_SCHEMA.md +394 -0
  181. package/skills-library/database-solutions/DATABASE_SCHEMA_VERIFICATION_GUIDE.md +348 -0
  182. package/skills-library/database-solutions/DATABASE_STRATEGY.md +71 -0
  183. package/skills-library/database-solutions/ES_MODULE_SEED_SCRIPT_PATTERN.md +52 -0
  184. package/skills-library/database-solutions/MIGRATION_GUIDE.md +3 -0
  185. package/skills-library/database-solutions/PLPGSQL_VARIABLE_CONFLICT_FIX.md +208 -0
  186. package/skills-library/database-solutions/POSTGRESQL_JSONB_DOUBLE_STRINGIFY_FIX.md +245 -0
  187. package/skills-library/database-solutions/POSTGRESQL_LICENSE_TABLE_DESIGN.md +393 -0
  188. package/skills-library/database-solutions/POSTGRESQL_UUID_DOCUMENT_RAG_DUAL_SCOPE.md +732 -0
  189. package/skills-library/database-solutions/POSTGRES_SQL_TEMPLATE_BINDING_ERROR.md +240 -0
  190. package/skills-library/database-solutions/PRISMA_DB_PUSH_DATA_LOSS_PREVENTION.md +141 -0
  191. package/skills-library/database-solutions/PRODUCTION_QUERY_OPTIMIZATION_RESTART_FIX.md +389 -0
  192. package/skills-library/database-solutions/RLS_SECURITY_GUIDE.md +107 -0
  193. package/skills-library/database-solutions/SCHEMA_ENHANCEMENTS_GUIDE.md +373 -0
  194. package/skills-library/database-solutions/SCHEMA_MIGRATION_GUIDE.md +368 -0
  195. package/skills-library/database-solutions/SCHEMA_VERIFICATION_QUICK_REFERENCE.md +104 -0
  196. package/skills-library/database-solutions/ai-erd-generator.md +1213 -0
  197. package/skills-library/database-solutions/content-publishing-states.md +631 -0
  198. package/skills-library/database-solutions/database-schema-designer.md +522 -0
  199. package/skills-library/database-solutions/er-diagram-components.md +569 -0
  200. package/skills-library/database-solutions/er-to-ddl-mapping.md +1405 -0
  201. package/skills-library/database-solutions/erd-creator-textbook-research.md +433 -0
  202. package/skills-library/database-solutions/erd-react-flow-architecture.md +1965 -0
  203. package/skills-library/database-solutions/mariadb-aggregate-function-replacement.md +145 -0
  204. package/skills-library/database-solutions/normalization-validator.md +778 -0
  205. package/skills-library/database-solutions/postgres-full-text-search-content.md +494 -0
  206. package/skills-library/database-solutions/postgresql-to-mysql-runtime-translation.md +286 -0
  207. package/skills-library/database-solutions/regex-alternation-ordering-sql-types.md +92 -0
  208. package/skills-library/database-solutions/reserved-word-context-aware-quoting.md +142 -0
  209. package/skills-library/database-solutions/sql-ddl-generator.md +756 -0
  210. package/skills-library/database-solutions/supabase-connection-pooler-fix.md +102 -0
  211. package/skills-library/deployment-security/CPANEL_NODE_DEPLOYMENT.md +166 -0
  212. package/skills-library/deployment-security/DEPLOYMENT.md +275 -0
  213. package/skills-library/deployment-security/DEPLOYMENT_CHECKLIST.md +363 -0
  214. package/skills-library/deployment-security/DEPLOYMENT_PLAN.md +669 -0
  215. package/skills-library/deployment-security/KNEX_DATABASE_ABSTRACTION.md +444 -0
  216. package/skills-library/deployment-security/LICENSE_KEY_SYSTEM.md +206 -0
  217. package/skills-library/deployment-security/NODE18_DEPENDENCY_COMPATIBILITY.md +284 -0
  218. package/skills-library/deployment-security/PHP_INSTALLER_WIZARD_GUIDE.md +315 -0
  219. package/skills-library/deployment-security/PM2_ENVIRONMENT_VARIABLE_CACHING.md +256 -0
  220. package/skills-library/deployment-security/PM2_MEMORY_EXHAUSTION_FIX.md +370 -0
  221. package/skills-library/deployment-security/PRODUCTION_DEPLOYMENT_GUIDE.md +592 -0
  222. package/skills-library/deployment-security/PRODUCTION_HARDENING_DOCUMENTATION.md +307 -0
  223. package/skills-library/deployment-security/PRODUCTION_RECOVERY_CHERRY_PICK_PATTERN.md +202 -0
  224. package/skills-library/deployment-security/PYINSTALLER_CUDA_WHISPER_BUNDLING.md +236 -0
  225. package/skills-library/deployment-security/SECURITY.md +41 -0
  226. package/skills-library/deployment-security/SMTP_SSL_HOSTNAME_MISMATCH_SHARED_HOSTING.md +220 -0
  227. package/skills-library/deployment-security/SPA_SEO_OPTIMIZATION_CPANEL.md +200 -0
  228. package/skills-library/deployment-security/SUPABASE_EDGE_FUNCTIONS.md +338 -0
  229. package/skills-library/deployment-security/VERCEL_GITHUB_DEPLOYMENT_GUIDE.md +858 -0
  230. package/skills-library/deployment-security/VPS_DEPLOYMENT_READINESS.md +356 -0
  231. package/skills-library/deployment-security/deployment-changes-not-applying.md +241 -0
  232. package/skills-library/deployment-security/env-file-management-production-local.md +203 -0
  233. package/skills-library/deployment-security/express-secure-file-downloads.md +413 -0
  234. package/skills-library/deployment-security/react-production-deployment-desktop-guide.md +2011 -0
  235. package/skills-library/deployment-security/self-hosted-supabase-coolify-guide.md +1684 -0
  236. package/skills-library/deployment-security/unique-features-ai-strategy-plaid-security.md +1613 -0
  237. package/skills-library/deployment-security/vps-deployment.md +135 -0
  238. package/skills-library/document-processing/WORD_EXPORT_MARKDOWN_FORMATTING.md +482 -0
  239. package/skills-library/document-processing/document-ai-landingai-integration.md +677 -0
  240. package/skills-library/document-processing/express-secure-file-downloads-mern.md +413 -0
  241. package/skills-library/document-processing/express-secure-file-downloads.md +413 -0
  242. package/skills-library/document-processing/md-to-word-converter.md +318 -0
  243. package/skills-library/document-processing/pdf-forms-integration/README.md +101 -0
  244. package/skills-library/document-processing/pdf-forms-integration/SKILL.md +662 -0
  245. package/skills-library/ecommerce/ADMIN_PRODUCTS_GUIDE.md +428 -0
  246. package/skills-library/ecommerce/ECOMMERCE_API_REFERENCE.md +776 -0
  247. package/skills-library/ecommerce/ECOMMERCE_COMPLETION_SUMMARY.md +673 -0
  248. package/skills-library/ecommerce/ECOMMERCE_IMPLEMENTATION_GUIDE.md +729 -0
  249. package/skills-library/ecommerce/ECOMMERCE_QUICK_REFERENCE.md +521 -0
  250. package/skills-library/ecommerce/ECOMMERCE_TESTING_CHECKLIST.md +565 -0
  251. package/skills-library/ecommerce/ECOMMERCE_WORKFLOW_GUIDE.md +1059 -0
  252. package/skills-library/ecommerce/PRODUCT_CREATION_EXPANDED.md +522 -0
  253. package/skills-library/ecommerce/agentic-commerce-protocol.md +203 -0
  254. package/skills-library/ecommerce/cart-abandonment-recovery.md +236 -0
  255. package/skills-library/ecommerce/cart-architecture-patterns.md +300 -0
  256. package/skills-library/ecommerce/cart-item-count-indicator.md +264 -0
  257. package/skills-library/ecommerce/checkout-ux-conversion.md +227 -0
  258. package/skills-library/ecommerce/composable-commerce-selection.md +166 -0
  259. package/skills-library/ecommerce/ecommerce-analytics-patterns.md +167 -0
  260. package/skills-library/ecommerce/fraud-detection-patterns.md +179 -0
  261. package/skills-library/ecommerce/inventory-stock-management.md +270 -0
  262. package/skills-library/ecommerce/order-saga-state-machine.md +336 -0
  263. package/skills-library/ecommerce/payment-provider-abstraction.md +245 -0
  264. package/skills-library/ecommerce/pci-compliance-checklist.md +192 -0
  265. package/skills-library/ecommerce/refund-chargeback-handling.md +177 -0
  266. package/skills-library/ecommerce/shipping-carrier-integration.md +218 -0
  267. package/skills-library/ecommerce/webhook-idempotency-patterns.md +253 -0
  268. package/skills-library/excalidraw-diagrams/.github/workflows/ci.yml +558 -0
  269. package/skills-library/excalidraw-diagrams/.github/workflows/prompt-gallery.yml +448 -0
  270. package/skills-library/excalidraw-diagrams/.github/workflows/release.yml +42 -0
  271. package/skills-library/excalidraw-diagrams/.github/workflows/test-reusable-ci.yml +25 -0
  272. package/skills-library/excalidraw-diagrams/CLAUDE.md +57 -0
  273. package/skills-library/excalidraw-diagrams/LICENSE +21 -0
  274. package/skills-library/excalidraw-diagrams/README.md +178 -0
  275. package/skills-library/excalidraw-diagrams/SKILL.md +715 -0
  276. package/skills-library/form-solutions/BUTTON_TYPE_FORM_SUBMISSION.md +336 -0
  277. package/skills-library/form-solutions/FILLABLE_PDF_IMPLEMENTATION.md +226 -0
  278. package/skills-library/form-solutions/SURVEYJS_QUESTIONNAIRE_SYSTEM.md +367 -0
  279. package/skills-library/form-solutions/tiptap-minimal-setup.md +690 -0
  280. package/skills-library/frontend/scholarly-classification-bubble-map.md +149 -0
  281. package/skills-library/infrastructure/ci-cd-pipeline-builder.md +517 -0
  282. package/skills-library/infrastructure/observability-designer.md +264 -0
  283. package/skills-library/infrastructure/performance-profiler.md +621 -0
  284. package/skills-library/installer-wizard-patterns.md +249 -0
  285. package/skills-library/integrations/CLAUDE_CODE_TOKEN_ANALYTICS.md +160 -0
  286. package/skills-library/integrations/CONFIGURABLE_AI_PROVIDER_SELECTION.md +728 -0
  287. package/skills-library/integrations/SOCKET_IO_BROADCAST_ALL_VS_ROOM.md +141 -0
  288. package/skills-library/integrations/VIRTUAL_MEETINGS_IMPLEMENTATION.md +374 -0
  289. package/skills-library/integrations/WORDPRESS_LEARNDASH_DATA_RECOVERY.md +53 -0
  290. package/skills-library/integrations/YOUTUBE_API_SETUP.md +141 -0
  291. package/skills-library/integrations/YOUTUBE_BOOKMARKING_EXPLANATION.md +252 -0
  292. package/skills-library/integrations/YOUTUBE_BOOKMARKING_SOLUTION.md +268 -0
  293. package/skills-library/integrations/YOUTUBE_OAUTH_SETUP_GUIDE.md +200 -0
  294. package/skills-library/integrations/YOUTUBE_VIDEO_FIX_COMPLETE.md +192 -0
  295. package/skills-library/integrations/ai-ml/GEMINI_AI_RAG_PIPELINE_COMPLETE_GUIDE.md +195 -0
  296. package/skills-library/integrations/ai-ml/GEMINI_IMAGE_GENERATION_SETUP.md +64 -0
  297. package/skills-library/integrations/cloudflare/cloudflare-turnstile-debugging.md +202 -0
  298. package/skills-library/integrations/cloudflare/cloudflare-turnstile-implementation.md +476 -0
  299. package/skills-library/integrations/cloudflare-turnstile-debugging.md +202 -0
  300. package/skills-library/integrations/cloudflare-turnstile-implementation.md +476 -0
  301. package/skills-library/integrations/ghost-creator-monetization-pattern.md +454 -0
  302. package/skills-library/integrations/headless-cms-architecture.md +484 -0
  303. package/skills-library/integrations/headless-cms-stack-selection.md +183 -0
  304. package/skills-library/integrations/payload-cms-patterns.md +674 -0
  305. package/skills-library/integrations/realtimestt-openwakeword-cuda-windows.md +229 -0
  306. package/skills-library/integrations/rss-podcast-integration.md +300 -0
  307. package/skills-library/integrations/wordpress/WORDPRESS_LEARNDASH_DATA_RECOVERY.md +53 -0
  308. package/skills-library/integrations/youtube/YOUTUBE_API_SETUP.md +141 -0
  309. package/skills-library/integrations/youtube/YOUTUBE_BOOKMARKING_EXPLANATION.md +252 -0
  310. package/skills-library/integrations/youtube/YOUTUBE_BOOKMARKING_SOLUTION.md +268 -0
  311. package/skills-library/integrations/youtube/YOUTUBE_OAUTH_SETUP_GUIDE.md +200 -0
  312. package/skills-library/integrations/youtube/YOUTUBE_VIDEO_FIX_COMPLETE.md +192 -0
  313. package/skills-library/marketing/campaign-analytics.md +97 -0
  314. package/skills-library/marketing/content-creator.md +105 -0
  315. package/skills-library/marketing/marketing-strategy-pmm.md +94 -0
  316. package/skills-library/marketing/social-media-analyzer.md +81 -0
  317. package/skills-library/methodology/ADVANCED_ORCHESTRATION_PATTERNS.md +401 -0
  318. package/skills-library/methodology/AGENT_SELF_IMPROVEMENT_LOOP.md +179 -0
  319. package/skills-library/methodology/BREATH_BASED_PARALLEL_EXECUTION.md +1 -1
  320. package/skills-library/methodology/CLEANSING_CYCLE.md +358 -0
  321. package/skills-library/methodology/CONFIDENCE_ANNOTATION_PATTERN.md +143 -0
  322. package/skills-library/methodology/CRITICAL_PATTERNS_DOCUMENTATION_COMPLETE.md +204 -0
  323. package/skills-library/methodology/DELIVERABLES_SUMMARY.md +341 -0
  324. package/skills-library/methodology/DIFFICULTY_AWARE_AGENT_ROUTING.md +252 -0
  325. package/skills-library/methodology/EVOLUTIONARY_SKILL_SYNTHESIS.md +219 -0
  326. package/skills-library/methodology/GLOMERULUS_DECISION_GATE.md +223 -0
  327. package/skills-library/methodology/HIBERNATION_SYSTEM.md +231 -0
  328. package/skills-library/methodology/INSTRUMENTATION_OVER_RESTRICTION.md +192 -0
  329. package/skills-library/methodology/MASTER_COMPLETION_SUMMARY.md +444 -0
  330. package/skills-library/methodology/MASTER_SESSION_COMPLETION.md +743 -0
  331. package/skills-library/methodology/MERN_QUICK_REFERENCE.md +358 -0
  332. package/skills-library/methodology/ORGAN_AGENT_MAPPING.md +177 -0
  333. package/skills-library/methodology/PARALLEL_WAVE_BASED_REFACTORING.md +440 -0
  334. package/skills-library/methodology/QUICK_REFERENCE.md +358 -0
  335. package/skills-library/methodology/SDFT_ONPOLICY_SELF_DISTILLATION.md +186 -0
  336. package/skills-library/methodology/SELF_QUESTIONING_TASK_GENERATION.md +270 -0
  337. package/skills-library/methodology/SESSION_COMPLETION_SUMMARY.md +304 -0
  338. package/skills-library/methodology/SESSION_SUMMARY.md +432 -0
  339. package/skills-library/methodology/WARRIOR_WORKFLOW_DEBUGGING_PROTOCOL.md +252 -0
  340. package/skills-library/methodology/tech-debt-tracker.md +570 -0
  341. package/skills-library/parallel-debug/SKILL.md +60 -0
  342. package/skills-library/patterns-standards/API_PATTERN_FIX_SUMMARY.md +236 -0
  343. package/skills-library/patterns-standards/BATCH_OPERATIONS_WITH_PROGRESS_MODAL.md +362 -0
  344. package/skills-library/patterns-standards/CRITICAL_CODING_PATTERNS.md +639 -0
  345. package/skills-library/patterns-standards/DARK_MODE_MODAL_VISIBILITY.md +258 -0
  346. package/skills-library/patterns-standards/ERROR_RESILIENCE_IMPLEMENTATION.md +375 -0
  347. package/skills-library/patterns-standards/ES_MODULE_IMPORT_HOISTING_DOTENV.md +298 -0
  348. package/skills-library/patterns-standards/NESTED_BACKDROP_FILTER_CSS_ARTIFACT_FIX.md +76 -0
  349. package/skills-library/patterns-standards/ORDERED_DETECTOR_PIPELINE_GRACEFUL_FALLBACK.md +333 -0
  350. package/skills-library/patterns-standards/PHASE_IMPORT_ERROR_DEBUGGING.md +271 -0
  351. package/skills-library/patterns-standards/PYNPUT_GLOBAL_HOTKEY_VK_MATCHING.md +252 -0
  352. package/skills-library/patterns-standards/REACT_USEEFFECT_CASCADE_RESET_FIX.md +132 -0
  353. package/skills-library/patterns-standards/SUBMENU_HOVER_DROPDOWN_PATTERN.md +225 -0
  354. package/skills-library/patterns-standards/TAILWIND_TEXT_VISIBILITY_OVERRIDE.md +322 -0
  355. package/skills-library/patterns-standards/THEME_AWARE_CSS_VARIABLES_PATTERN.md +209 -0
  356. package/skills-library/patterns-standards/THEME_USER_OBJECT_PROPERTY_NAMING.md +194 -0
  357. package/skills-library/patterns-standards/TOOLTIP_BLOCKING_CLICKS_FIX.md +267 -0
  358. package/skills-library/patterns-standards/claude-code-plugin-structure.md +235 -0
  359. package/skills-library/patterns-standards/react-i18next-setup.md +429 -0
  360. package/skills-library/patterns-standards/thesys-c1-generative-ui-integration.md +967 -0
  361. package/skills-library/plugin-development/CLAUDE_CODE_COMMAND_REGISTRATION_SILENT_FAILURE.md +315 -0
  362. package/skills-library/plugin-development/plugin-command-namespace-vs-global.md +390 -0
  363. package/skills-library/plugin-development/plugin-doc-auto-generation.md +172 -0
  364. package/skills-library/security/GITHUB_REPO_SECURITY_AUDIT.md +115 -0
  365. package/skills-library/security/admin-deletion-safety.md +396 -0
  366. package/skills-library/security/application-vuln-patterns.md +477 -0
  367. package/skills-library/security/env-secrets-manager.md +686 -0
  368. package/skills-library/security/secure-ai-application-templates.md +347 -0
  369. package/skills-library/security/sql-injection-prevention-postgresjs.md +151 -0
  370. package/skills-library/supabase-connection-pooler-fix.md +102 -0
  371. package/skills-library/system-context/POWERSHELL_BASH_INTEROP.md +82 -0
  372. package/skills-library/system-context/SERVICE_LIFECYCLE_MANAGEMENT.md +119 -0
  373. package/skills-library/system-context/SKILL.md +40 -0
  374. package/skills-library/system-context/WINDOWS_DEV_ENVIRONMENT.md +73 -0
  375. package/skills-library/testing/E2E_PLAYWRIGHT_PATTERNS.md +99 -0
  376. package/skills-library/testing/INTEGRATION_TEST_STRATEGY.md +82 -0
  377. package/skills-library/testing/RED_GREEN_BUGFIX_GATE.md +203 -0
  378. package/skills-library/testing/TEST_DATA_MANAGEMENT.md +69 -0
  379. package/skills-library/testing/VITEST_UNIT_TEST_PATTERNS.md +75 -0
  380. package/skills-library/testing/playwright-api-security-tests.md +202 -0
  381. package/skills-library/toolbox/SKILL.md +84 -0
  382. package/skills-library/toolbox/code-graph-and-web-scraping-mcps.md +237 -0
  383. package/skills-library/ui-ux-pro-max/ACCESSIBILITY_ESSENTIALS.md +115 -0
  384. package/skills-library/ui-ux-pro-max/DESIGN_SYSTEM_SCAFFOLDING.md +133 -0
  385. package/skills-library/ui-ux-pro-max/RESPONSIVE_LAYOUT_PATTERNS.md +119 -0
  386. package/skills-library/ui-ux-pro-max/SKILL.md +386 -0
  387. package/skills-library/ui-ux-pro-max/data/charts.csv +26 -0
  388. package/skills-library/ui-ux-pro-max/data/colors.csv +97 -0
  389. package/skills-library/ui-ux-pro-max/data/icons.csv +101 -0
  390. package/skills-library/ui-ux-pro-max/data/landing.csv +31 -0
  391. package/skills-library/ui-ux-pro-max/data/products.csv +97 -0
  392. package/skills-library/ui-ux-pro-max/data/react-performance.csv +45 -0
  393. package/skills-library/ui-ux-pro-max/data/stacks/astro.csv +54 -0
  394. package/skills-library/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
  395. package/skills-library/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
  396. package/skills-library/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
  397. package/skills-library/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
  398. package/skills-library/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
  399. package/skills-library/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
  400. package/skills-library/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
  401. package/skills-library/ui-ux-pro-max/data/stacks/react.csv +54 -0
  402. package/skills-library/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
  403. package/skills-library/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
  404. package/skills-library/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
  405. package/skills-library/ui-ux-pro-max/data/stacks/vue.csv +50 -0
  406. package/skills-library/ui-ux-pro-max/data/styles.csv +68 -0
  407. package/skills-library/ui-ux-pro-max/data/typography.csv +58 -0
  408. package/skills-library/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
  409. package/skills-library/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
  410. package/skills-library/ui-ux-pro-max/data/web-interface.csv +31 -0
  411. package/skills-library/wordpress-style-theme-components.md +1526 -0
  412. package/templates/ASSUMPTIONS.md +1 -1
  413. package/templates/DECISION_LOG.md +0 -1
  414. package/templates/phase-prompt.md +1 -1
  415. package/templates/phoenix-comparison.md +6 -6
  416. package/templates/skill-api-integration.md +106 -0
  417. package/templates/skill-architecture-pattern.md +92 -0
  418. package/templates/skill-debug-pattern.md +98 -0
  419. package/templates/skill-devops-recipe.md +107 -0
  420. package/templates/skill-general.md +65 -0
  421. package/templates/skill-ui-component.md +113 -0
  422. package/tools/uat-runner.py +179 -0
  423. package/version.json +7 -3
  424. package/workflows/handoff-session.md +2 -2
  425. package/workflows/new-project.md +2 -2
  426. package/workflows/plan-phase.md +1 -1
  427. package/.claude-plugin/plugin.json +0 -64
  428. package/skills-library/_general/methodology/LIVE_BREADCRUMB_PROTOCOL.md +0 -242
  429. package/skills-library/_general/methodology/llm-judge-memory-crud.md +0 -241
  430. package/skills-library/methodology/REFLEXION_MEMORY_PATTERN.md +0 -183
  431. package/skills-library/methodology/RESEARCH_BACKED_WORKFLOW_UPGRADE.md +0 -263
  432. package/skills-library/methodology/SABBATH_REST_PATTERN.md +0 -267
  433. package/skills-library/methodology/STONE_AND_SCAFFOLD.md +0 -220
  434. package/skills-library/specialists/api-architecture/api-designer.md +0 -49
  435. package/skills-library/specialists/api-architecture/graphql-architect.md +0 -49
  436. package/skills-library/specialists/api-architecture/mcp-developer.md +0 -51
  437. package/skills-library/specialists/api-architecture/microservices-architect.md +0 -50
  438. package/skills-library/specialists/api-architecture/websocket-engineer.md +0 -48
  439. package/skills-library/specialists/backend/django-expert.md +0 -52
  440. package/skills-library/specialists/backend/fastapi-expert.md +0 -52
  441. package/skills-library/specialists/backend/laravel-specialist.md +0 -52
  442. package/skills-library/specialists/backend/nestjs-expert.md +0 -51
  443. package/skills-library/specialists/backend/rails-expert.md +0 -53
  444. package/skills-library/specialists/backend/spring-boot-engineer.md +0 -56
  445. package/skills-library/specialists/data-ml/fine-tuning-expert.md +0 -48
  446. package/skills-library/specialists/data-ml/ml-pipeline.md +0 -47
  447. package/skills-library/specialists/data-ml/pandas-pro.md +0 -47
  448. package/skills-library/specialists/data-ml/rag-architect.md +0 -51
  449. package/skills-library/specialists/data-ml/spark-engineer.md +0 -47
  450. package/skills-library/specialists/frontend/angular-architect.md +0 -52
  451. package/skills-library/specialists/frontend/flutter-expert.md +0 -51
  452. package/skills-library/specialists/frontend/nextjs-developer.md +0 -54
  453. package/skills-library/specialists/frontend/react-native-expert.md +0 -50
  454. package/skills-library/specialists/frontend/vue-expert.md +0 -51
  455. package/skills-library/specialists/infrastructure/chaos-engineer.md +0 -74
  456. package/skills-library/specialists/infrastructure/cloud-architect.md +0 -70
  457. package/skills-library/specialists/infrastructure/database-optimizer.md +0 -64
  458. package/skills-library/specialists/infrastructure/devops-engineer.md +0 -70
  459. package/skills-library/specialists/infrastructure/kubernetes-specialist.md +0 -52
  460. package/skills-library/specialists/infrastructure/monitoring-expert.md +0 -70
  461. package/skills-library/specialists/infrastructure/sre-engineer.md +0 -70
  462. package/skills-library/specialists/infrastructure/terraform-engineer.md +0 -51
  463. package/skills-library/specialists/languages/cpp-pro.md +0 -74
  464. package/skills-library/specialists/languages/csharp-developer.md +0 -69
  465. package/skills-library/specialists/languages/dotnet-core-expert.md +0 -54
  466. package/skills-library/specialists/languages/golang-pro.md +0 -51
  467. package/skills-library/specialists/languages/java-architect.md +0 -49
  468. package/skills-library/specialists/languages/javascript-pro.md +0 -68
  469. package/skills-library/specialists/languages/kotlin-specialist.md +0 -68
  470. package/skills-library/specialists/languages/php-pro.md +0 -49
  471. package/skills-library/specialists/languages/python-pro.md +0 -52
  472. package/skills-library/specialists/languages/react-expert.md +0 -51
  473. package/skills-library/specialists/languages/rust-engineer.md +0 -50
  474. package/skills-library/specialists/languages/sql-pro.md +0 -56
  475. package/skills-library/specialists/languages/swift-expert.md +0 -69
  476. package/skills-library/specialists/languages/typescript-pro.md +0 -51
  477. package/skills-library/specialists/platform/atlassian-mcp.md +0 -52
  478. package/skills-library/specialists/platform/embedded-systems.md +0 -53
  479. package/skills-library/specialists/platform/game-developer.md +0 -53
  480. package/skills-library/specialists/platform/salesforce-developer.md +0 -53
  481. package/skills-library/specialists/platform/shopify-expert.md +0 -49
  482. package/skills-library/specialists/platform/wordpress-pro.md +0 -49
  483. package/skills-library/specialists/quality/code-documenter.md +0 -51
  484. package/skills-library/specialists/quality/code-reviewer.md +0 -67
  485. package/skills-library/specialists/quality/debugging-wizard.md +0 -51
  486. package/skills-library/specialists/quality/fullstack-guardian.md +0 -51
  487. package/skills-library/specialists/quality/legacy-modernizer.md +0 -50
  488. package/skills-library/specialists/quality/playwright-expert.md +0 -65
  489. package/skills-library/specialists/quality/spec-miner.md +0 -56
  490. package/skills-library/specialists/quality/test-master.md +0 -65
  491. package/skills-library/specialists/security/secure-code-guardian.md +0 -55
  492. package/skills-library/specialists/security/security-reviewer.md +0 -53
  493. package/skills-library/specialists/workflow/architecture-designer.md +0 -53
  494. package/skills-library/specialists/workflow/cli-developer.md +0 -70
  495. package/skills-library/specialists/workflow/feature-forge.md +0 -65
  496. package/skills-library/specialists/workflow/prompt-engineer.md +0 -54
  497. package/skills-library/specialists/workflow/the-fool.md +0 -62
  498. /package/skills-library/{performance → _general/performance}/cache-augmented-generation.md +0 -0
  499. /package/skills-library/{debugging → parallel-debug}/FAILURE_TAXONOMY_CLASSIFICATION.md +0 -0
  500. /package/skills-library/{debugging → parallel-debug}/THREE_AGENT_HYPOTHESIS_DEBUGGING.md +0 -0
@@ -0,0 +1,1414 @@
1
+ ---
2
+ name: text-to-speech-provider-selector
3
+ category: creative-multimedia
4
+ version: 1.0.0
5
+ contributed: 2026-03-10
6
+ contributor: dominion-flow-research
7
+ last_updated: 2026-03-10
8
+ tags: [tts, text-to-speech, audio, podcast, voice-synthesis, elevenlabs, orpheus, chatterbox, google-cloud-tts, bark]
9
+ difficulty: medium
10
+ ---
11
+
12
+ # Text-to-Speech Provider Selector
13
+ ## Description
14
+
15
+ Select the right Text-to-Speech provider for AI-powered audio generation — podcasts, online courses, narration, sermon recordings, and interactive voice applications. This skill provides a structured comparison of open-source and cloud TTS providers, complete integration code, and cost modeling to help you make the right choice without trial-and-error.
16
+
17
+ ## When to Use
18
+
19
+ - Building a podcast generation pipeline (AI hosts, course narration)
20
+ - Adding voice output to any application (chatbots, accessibility, notifications)
21
+ - Choosing between local/private TTS and cloud APIs
22
+ - Generating multi-speaker audio content (dialogues, interviews)
23
+ - Estimating TTS costs for production workloads
24
+ - Need emotion control or voice cloning in generated speech
25
+
26
+ ---
27
+
28
+ ## 1. Provider Comparison Matrix
29
+
30
+ ### Open-Source (Local) Providers
31
+
32
+ #### Orpheus TTS (canopyai/Orpheus-TTS)
33
+
34
+ The breakthrough model of 2025. Built on a Llama-3b backbone, Orpheus treats speech synthesis as a language modeling task — predicting speech tokens the same way an LLM predicts text tokens. This architectural choice gives it natural prosody and emotional range that rivals ElevenLabs.
35
+
36
+ - **Architecture:** LLM-backbone (Llama-3b fine-tuned on speech tokens)
37
+ - **Model sizes:** 3B (best quality), 1B (balanced), 400M (fast), 150M (edge/mobile)
38
+ - **Streaming latency:** ~200ms first-chunk with 3B model
39
+ - **Voice cloning:** Zero-shot (provide a reference clip)
40
+ - **Emotion:** Natural emotional speech from text context (no explicit slider)
41
+ - **Languages:** English primary, community multilingual fine-tunes emerging
42
+ - **License:** MIT — fully open, commercial use allowed
43
+ - **VRAM:** 3B needs ~8GB, 1B needs ~4GB, 400M needs ~2GB
44
+ - **Best for:** Highest-quality local TTS, privacy-sensitive applications, production narration
45
+
46
+ #### Chatterbox (resemble-ai/chatterbox)
47
+
48
+ The first open-source model with a dedicated emotion exaggeration parameter. One slider takes output from monotone newsreader to dramatic podcast host. Leading HuggingFace trending models in late 2025.
49
+
50
+ - **Architecture:** Custom 350M parameter model with emotion conditioning
51
+ - **Model size:** 350M params (~1.5GB VRAM)
52
+ - **Streaming latency:** ~300ms first-chunk
53
+ - **Voice cloning:** 5-second reference clip (zero-shot)
54
+ - **Emotion:** Explicit exaggeration slider (0.0 = flat monotone, 1.0 = maximum drama)
55
+ - **Languages:** 23 languages supported
56
+ - **License:** MIT — fully open, commercial use allowed
57
+ - **Watermarking:** Built-in audio watermarking (can be disabled)
58
+ - **Best for:** Emotion-controlled narration, multilingual content, podcast personality
59
+
60
+ #### Bark (suno-ai/bark)
61
+
62
+ Unique among TTS models — Bark generates not just speech but non-verbal audio: laughter, sighing, throat-clearing, background ambience, and even simple music. Less controllable than purpose-built TTS but uniquely expressive for creative content.
63
+
64
+ - **Architecture:** Transformer-based, multi-codebook audio generation
65
+ - **Streaming:** Not natively streaming (generates full clips)
66
+ - **Voice cloning:** Speaker prompts (less precise than Orpheus/Chatterbox)
67
+ - **Emotion:** Implicit via text prompts ([laughs], [sighs], [clears throat])
68
+ - **Languages:** 13+ languages
69
+ - **License:** MIT — fully open, commercial use allowed
70
+ - **VRAM:** ~6GB for full model
71
+ - **Best for:** Creative audio with sound effects, expressive storytelling, demo/prototype content
72
+
73
+ #### Coqui TTS / XTTS-v2
74
+
75
+ The widest language coverage of any TTS model — over 1,100 languages. XTTS-v2 delivers high-quality multi-lingual speech with just a 6-second voice cloning reference.
76
+
77
+ - **Architecture:** GPT-style autoregressive + VQ-VAE
78
+ - **Model size:** ~1.6B params
79
+ - **Voice cloning:** 6-second reference clip (zero-shot)
80
+ - **Languages:** 1,100+ languages (XTTS-v2 supports 17 well, Coqui TTS covers 1,100+)
81
+ - **License:** Coqui Public Model License (CPML) — **non-commercial** for XTTS-v2 model weights. The code is MPL-2.0.
82
+ - **VRAM:** ~4-6GB
83
+ - **Best for:** Multilingual/minority language content, research, non-commercial projects
84
+
85
+ > **License warning:** XTTS-v2 model weights are under CPML (non-commercial). For commercial use, train your own model with the Coqui TTS framework or use a different provider.
86
+
87
+ #### Parler TTS (HuggingFace)
88
+
89
+ The best developer experience of the open-source options. Clean Python API, solid documentation, and straightforward integration. Describe the voice you want in natural language ("a warm female voice with a slight British accent").
90
+
91
+ - **Architecture:** Encoder-decoder with text-described voice conditioning
92
+ - **Voice control:** Natural language voice description (no reference clip needed)
93
+ - **Languages:** English primary, multilingual variants available
94
+ - **License:** Apache 2.0 — fully open, commercial use allowed
95
+ - **VRAM:** ~4GB
96
+ - **Best for:** Quick prototyping, simple integrations, developers who want clean DX
97
+
98
+ ---
99
+
100
+ ### Cloud API Providers
101
+
102
+ #### ElevenLabs
103
+
104
+ The industry benchmark for TTS quality. Consistently rated highest in blind listening tests. Offers instant voice cloning, a growing library of pre-made voices, and true real-time streaming.
105
+
106
+ - **Quality:** Best-in-class (reference standard)
107
+ - **Free tier:** 10,000 credits/month (~10 minutes of audio)
108
+ - **Pricing:** Starter $5/mo (30k credits), Creator $22/mo (100k credits), Pro $99/mo (500k credits)
109
+ - **Per-character cost:** $0.12-$0.30 per 1,000 characters (varies by tier)
110
+ - **Voice cloning:** Instant (30s clip) or Professional (30+ minutes for custom model)
111
+ - **Streaming:** Yes, WebSocket-based real-time streaming
112
+ - **Languages:** 29+ languages
113
+ - **Emotion:** Automatic from text context + style controls
114
+ - **Best for:** Production-quality content, commercial podcasts, highest fidelity requirements
115
+
116
+ #### Google Cloud TTS
117
+
118
+ Reliable, well-documented, and cost-effective for high volume. WaveNet and Neural2 voices sound natural. Strong SSML support for fine-grained control over pronunciation, pauses, and emphasis.
119
+
120
+ - **Quality:** Very good (WaveNet/Neural2), good (Standard)
121
+ - **Pricing:** Standard: $4/1M chars, WaveNet: $16/1M chars, Neural2: $16/1M chars
122
+ - **Free tier:** 1M Standard chars/mo, 1M WaveNet chars/mo (first 90 days), then Standard only
123
+ - **SSML:** Full SSML 1.0 support with custom extensions
124
+ - **Streaming:** gRPC streaming supported
125
+ - **Languages:** 50+ languages, 220+ voices
126
+ - **Voice cloning:** Custom Voice (enterprise only, requires 2+ hours of training data)
127
+ - **Best for:** High-volume production, GCP-native apps, SSML-heavy workflows
128
+
129
+ #### Azure AI Speech
130
+
131
+ Microsoft's offering stands out for its per-word timestamp feature — essential for subtitle generation, karaoke-style highlighting, and precise audio-text alignment. The HD V2 tier adds context-aware emotion that reads surrounding sentences to modulate delivery.
132
+
133
+ - **Quality:** Excellent (Neural), Best-in-class for context-aware emotion (HD V2)
134
+ - **Pricing:** Neural: $15-16/1M chars, HD V2: $30/1M chars
135
+ - **Free tier:** 500K chars/mo (Neural)
136
+ - **Unique feature:** Per-word timestamps (viseme + word boundary events)
137
+ - **Streaming:** Real-time streaming via WebSocket
138
+ - **Languages:** 60+ languages, 400+ voices
139
+ - **Voice cloning:** Personal Voice (requires consent + training data)
140
+ - **Emotion:** HD V2 reads context to auto-select emotion, plus explicit SSML styles
141
+ - **Best for:** Subtitle generation, word-level sync, Azure-native apps, context-aware emotion
142
+
143
+ ---
144
+
145
+ ### Full Comparison Table
146
+
147
+ | Provider | Quality | Cost | Latency | Languages | Voice Cloning | Emotion Control | License | Best For |
148
+ |----------|---------|------|---------|-----------|---------------|-----------------|---------|----------|
149
+ | **Orpheus TTS** | Excellent | Free (GPU cost) | ~200ms stream | English+ | Zero-shot | From context | MIT | Best local quality |
150
+ | **Chatterbox** | Very Good | Free (GPU cost) | ~300ms stream | 23 | 5-sec clip | Slider (0-1) | MIT | Emotion control |
151
+ | **Bark** | Good | Free (GPU cost) | ~2-5s full clip | 13+ | Speaker prompts | Text tags | MIT | Non-speech sounds |
152
+ | **Coqui XTTS-v2** | Very Good | Free (GPU cost) | ~500ms | 1,100+ | 6-sec clip | Limited | CPML (non-commercial) | Most languages |
153
+ | **Parler TTS** | Good | Free (GPU cost) | ~400ms | English+ | NL description | NL description | Apache 2.0 | Best DX |
154
+ | **ElevenLabs** | Best | $0.12-0.30/1k chars | ~150ms stream | 29+ | Instant (30s) | Auto + styles | Proprietary | Production quality |
155
+ | **Google Cloud** | Very Good | $4-16/1M chars | ~200ms gRPC | 50+ | Enterprise only | SSML | Proprietary | High volume, SSML |
156
+ | **Azure Speech** | Excellent | $15-30/1M chars | ~200ms WS | 60+ | Personal Voice | HD V2 context | Proprietary | Word timestamps |
157
+
158
+ ---
159
+
160
+ ## 2. Decision Tree
161
+
162
+ Use this flowchart to select your provider:
163
+
164
+ ```
165
+ START: What's your primary constraint?
166
+ |
167
+ +-- Budget = $0 (must be free)
168
+ | |
169
+ | +-- Need best quality? ---------> Orpheus TTS (3B model)
170
+ | +-- Need emotion control? ------> Chatterbox (emotion slider)
171
+ | +-- Need sound effects too? ----> Bark
172
+ | +-- Need 100+ languages? -------> Coqui TTS (check CPML license)
173
+ | +-- Need fastest setup? --------> Parler TTS
174
+ |
175
+ +-- Budget available (cloud OK)
176
+ | |
177
+ | +-- Need absolute best quality? -----> ElevenLabs
178
+ | +-- Need per-word timestamps? -------> Azure HD V2
179
+ | +-- Need cheapest per-character? ----> Google Cloud TTS (Standard)
180
+ | +-- Need context-aware emotion? -----> Azure HD V2
181
+ | +-- Already on GCP? -----------------> Google Cloud TTS
182
+ | +-- Already on Azure? ---------------> Azure AI Speech
183
+ |
184
+ +-- Privacy/compliance (no cloud)
185
+ | |
186
+ | +-- Commercial use? -----> Orpheus (MIT) or Chatterbox (MIT)
187
+ | +-- Research/non-profit? -> Coqui XTTS-v2 (best multilingual)
188
+ |
189
+ +-- Edge/mobile deployment
190
+ |
191
+ +-- Orpheus 150M or 400M (smallest footprint)
192
+ ```
193
+
194
+ ### Quick Decision Summary
195
+
196
+ | Scenario | Recommendation |
197
+ |----------|---------------|
198
+ | Best quality, budget available | ElevenLabs |
199
+ | Best open-source quality | Orpheus TTS (3B) |
200
+ | Need emotion control slider | Chatterbox |
201
+ | Non-speech sounds (laughter, sighs) | Bark |
202
+ | 100% local/private, commercial | Orpheus or Chatterbox (MIT) |
203
+ | Cheapest cloud API | Google Cloud TTS Standard ($4/1M chars) |
204
+ | Per-word timestamps for subtitles | Azure HD V2 |
205
+ | Most language coverage | Coqui TTS (1,100+) |
206
+ | Fastest prototype / best DX | Parler TTS |
207
+ | Edge/mobile deployment | Orpheus 150M-400M |
208
+
209
+ ---
210
+
211
+ ## 3. TypeScript Integration Examples
212
+
213
+ ### 3a. ElevenLabs (REST API with Streaming)
214
+
215
+ ```typescript
216
+ import { Readable } from 'stream';
217
+ import { writeFile } from 'fs/promises';
218
+
219
+ interface ElevenLabsConfig {
220
+ apiKey: string;
221
+ voiceId: string;
222
+ modelId?: string; // 'eleven_turbo_v2_5' for speed, 'eleven_multilingual_v2' for quality
223
+ stability?: number; // 0.0-1.0 (lower = more expressive)
224
+ similarityBoost?: number; // 0.0-1.0 (higher = closer to original voice)
225
+ }
226
+
227
+ /**
228
+ * Stream audio from ElevenLabs TTS API.
229
+ * Returns a Buffer of MP3 audio data.
230
+ */
231
+ async function generateSpeech(
232
+ text: string,
233
+ config: ElevenLabsConfig
234
+ ): Promise<Buffer> {
235
+ const {
236
+ apiKey,
237
+ voiceId,
238
+ modelId = 'eleven_multilingual_v2',
239
+ stability = 0.5,
240
+ similarityBoost = 0.75,
241
+ } = config;
242
+
243
+ const response = await fetch(
244
+ `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`,
245
+ {
246
+ method: 'POST',
247
+ headers: {
248
+ 'Content-Type': 'application/json',
249
+ 'xi-api-key': apiKey,
250
+ },
251
+ body: JSON.stringify({
252
+ text,
253
+ model_id: modelId,
254
+ voice_settings: {
255
+ stability,
256
+ similarity_boost: similarityBoost,
257
+ },
258
+ }),
259
+ }
260
+ );
261
+
262
+ if (!response.ok) {
263
+ const error = await response.text();
264
+ throw new Error(`ElevenLabs API error ${response.status}: ${error}`);
265
+ }
266
+
267
+ // Collect streamed chunks into a single buffer
268
+ const chunks: Uint8Array[] = [];
269
+ const reader = response.body?.getReader();
270
+ if (!reader) throw new Error('No response body');
271
+
272
+ while (true) {
273
+ const { done, value } = await reader.read();
274
+ if (done) break;
275
+ chunks.push(value);
276
+ }
277
+
278
+ return Buffer.concat(chunks);
279
+ }
280
+
281
+ /**
282
+ * Stream ElevenLabs audio directly to a writable stream (e.g., HTTP response).
283
+ * Useful for real-time playback without buffering the entire file.
284
+ */
285
+ async function streamSpeechToResponse(
286
+ text: string,
287
+ config: ElevenLabsConfig,
288
+ output: NodeJS.WritableStream
289
+ ): Promise<void> {
290
+ const { apiKey, voiceId, modelId = 'eleven_turbo_v2_5' } = config;
291
+
292
+ const response = await fetch(
293
+ `https://api.elevenlabs.io/v1/text-to-speech/${voiceId}/stream`,
294
+ {
295
+ method: 'POST',
296
+ headers: {
297
+ 'Content-Type': 'application/json',
298
+ 'xi-api-key': apiKey,
299
+ },
300
+ body: JSON.stringify({
301
+ text,
302
+ model_id: modelId,
303
+ voice_settings: { stability: 0.5, similarity_boost: 0.75 },
304
+ // Optimize for streaming latency
305
+ optimize_streaming_latency: 3, // 0-4, higher = lower latency but slight quality trade-off
306
+ }),
307
+ }
308
+ );
309
+
310
+ if (!response.ok) {
311
+ throw new Error(`ElevenLabs stream error: ${response.status}`);
312
+ }
313
+
314
+ const reader = response.body?.getReader();
315
+ if (!reader) throw new Error('No response body');
316
+
317
+ while (true) {
318
+ const { done, value } = await reader.read();
319
+ if (done) break;
320
+ output.write(value);
321
+ }
322
+
323
+ output.end();
324
+ }
325
+
326
+ // --- Usage ---
327
+ async function main() {
328
+ const config: ElevenLabsConfig = {
329
+ apiKey: process.env.ELEVENLABS_API_KEY!,
330
+ voiceId: 'pNInz6obpgDQGcFmaJgB', // "Adam" pre-made voice
331
+ modelId: 'eleven_multilingual_v2',
332
+ stability: 0.4, // More expressive
333
+ similarityBoost: 0.8,
334
+ };
335
+
336
+ const audioBuffer = await generateSpeech(
337
+ 'Welcome to our podcast. Today we explore the intersection of faith and technology.',
338
+ config
339
+ );
340
+
341
+ await writeFile('output.mp3', audioBuffer);
342
+ console.log(`Generated ${audioBuffer.length} bytes of audio`);
343
+ }
344
+ ```
345
+
346
+ ### 3b. Orpheus TTS (Local via Python Subprocess)
347
+
348
+ Orpheus runs as a Python process. From Node.js/TypeScript, call it via subprocess. This keeps your main app in TypeScript while leveraging the Python ML ecosystem.
349
+
350
+ ```typescript
351
+ import { spawn } from 'child_process';
352
+ import { writeFile, readFile, unlink } from 'fs/promises';
353
+ import { randomUUID } from 'crypto';
354
+ import path from 'path';
355
+
356
+ type OrpheusModelSize = '3B' | '1B' | '400M' | '150M';
357
+
358
+ interface OrpheusConfig {
359
+ pythonPath?: string; // Path to Python with orpheus installed
360
+ modelsDir?: string; // Where Orpheus models are stored
361
+ outputDir?: string; // Temp directory for audio files
362
+ }
363
+
364
+ /**
365
+ * Generate speech using local Orpheus TTS via Python subprocess.
366
+ * Requires: pip install orpheus-tts torch
367
+ *
368
+ * @param text - The text to synthesize
369
+ * @param modelSize - Model variant: '3B' (best), '1B', '400M', '150M' (fastest)
370
+ * @param config - Path configuration
371
+ * @returns Buffer containing WAV audio data
372
+ */
373
+ async function generateLocalTTS(
374
+ text: string,
375
+ modelSize: OrpheusModelSize = '3B',
376
+ config: OrpheusConfig = {}
377
+ ): Promise<Buffer> {
378
+ const {
379
+ pythonPath = 'python',
380
+ outputDir = '/tmp/orpheus-output',
381
+ } = config;
382
+
383
+ const outputFile = path.join(outputDir, `orpheus-${randomUUID()}.wav`);
384
+
385
+ // Model name mapping
386
+ const modelMap: Record<OrpheusModelSize, string> = {
387
+ '3B': 'canopyai/Orpheus-TTS-0.1-3B',
388
+ '1B': 'canopyai/Orpheus-TTS-0.1-1B',
389
+ '400M': 'canopyai/Orpheus-TTS-0.1-400M',
390
+ '150M': 'canopyai/Orpheus-TTS-0.1-150M',
391
+ };
392
+
393
+ // Python script reads text from stdin to avoid shell injection
394
+ const pythonScript = `
395
+ import sys
396
+ import torch
397
+ from orpheus_tts import OrpheusModel
398
+
399
+ text = sys.stdin.read()
400
+ model = OrpheusModel(model_name="${modelMap[modelSize]}")
401
+ audio = model.generate_speech(
402
+ prompt=text,
403
+ voice="tara", # Default voice; options: tara, leah, jess, leo, dan, mia, zac
404
+ )
405
+ audio.export("${outputFile.replace(/\\/g, '/')}", format="wav")
406
+ print("OK")
407
+ `;
408
+
409
+ return new Promise((resolve, reject) => {
410
+ const proc = spawn(pythonPath, ['-c', pythonScript], {
411
+ timeout: 120_000, // 2 minute timeout for large texts
412
+ });
413
+ // Pass text via stdin to avoid injection
414
+ proc.stdin.write(text);
415
+ proc.stdin.end();
416
+
417
+ let stderr = '';
418
+ proc.stderr.on('data', (data) => { stderr += data.toString(); });
419
+
420
+ proc.on('close', async (code) => {
421
+ if (code !== 0) {
422
+ reject(new Error(`Orpheus TTS failed (exit ${code}): ${stderr}`));
423
+ return;
424
+ }
425
+
426
+ try {
427
+ const audioBuffer = await readFile(outputFile);
428
+ await unlink(outputFile); // Clean up temp file
429
+ resolve(audioBuffer);
430
+ } catch (err) {
431
+ reject(new Error(`Failed to read Orpheus output: ${err}`));
432
+ }
433
+ });
434
+
435
+ proc.on('error', (err) => {
436
+ reject(new Error(`Failed to spawn Python: ${err.message}`));
437
+ });
438
+ });
439
+ }
440
+
441
+ /**
442
+ * Generate speech with a cloned voice using a reference audio clip.
443
+ */
444
+ async function generateClonedVoiceTTS(
445
+ text: string,
446
+ referenceAudioPath: string,
447
+ modelSize: OrpheusModelSize = '3B',
448
+ config: OrpheusConfig = {}
449
+ ): Promise<Buffer> {
450
+ const { pythonPath = 'python', outputDir = '/tmp/orpheus-output' } = config;
451
+ const outputFile = path.join(outputDir, `orpheus-clone-${randomUUID()}.wav`);
452
+
453
+ const modelMap: Record<OrpheusModelSize, string> = {
454
+ '3B': 'canopyai/Orpheus-TTS-0.1-3B',
455
+ '1B': 'canopyai/Orpheus-TTS-0.1-1B',
456
+ '400M': 'canopyai/Orpheus-TTS-0.1-400M',
457
+ '150M': 'canopyai/Orpheus-TTS-0.1-150M',
458
+ };
459
+
460
+ const pythonScript = `
461
+ import sys, torch
462
+ from orpheus_tts import OrpheusModel
463
+
464
+ text = sys.stdin.read()
465
+ model = OrpheusModel(model_name="${modelMap[modelSize]}")
466
+ audio = model.generate_speech(
467
+ prompt=text,
468
+ reference_audio="${referenceAudioPath.replace(/\\/g, '/')}",
469
+ )
470
+ audio.export("${outputFile.replace(/\\/g, '/')}", format="wav")
471
+ print("OK")
472
+ `;
473
+
474
+ return new Promise((resolve, reject) => {
475
+ const proc = spawn(pythonPath, ['-c', pythonScript], { timeout: 120_000 });
476
+ proc.stdin.write(text);
477
+ proc.stdin.end();
478
+ let stderr = '';
479
+ proc.stderr.on('data', (data) => { stderr += data.toString(); });
480
+
481
+ proc.on('close', async (code) => {
482
+ if (code !== 0) {
483
+ reject(new Error(`Orpheus clone TTS failed (exit ${code}): ${stderr}`));
484
+ return;
485
+ }
486
+ try {
487
+ const buf = await readFile(outputFile);
488
+ await unlink(outputFile);
489
+ resolve(buf);
490
+ } catch (err) {
491
+ reject(new Error(`Failed to read output: ${err}`));
492
+ }
493
+ });
494
+
495
+ proc.on('error', (err) => reject(new Error(`Spawn failed: ${err.message}`)));
496
+ });
497
+ }
498
+
499
+ // --- Usage ---
500
+ async function main() {
501
+ // Draft quality (fast) — use 400M for iteration
502
+ const draftAudio = await generateLocalTTS(
503
+ 'Testing the Orpheus text-to-speech engine.',
504
+ '400M'
505
+ );
506
+ await writeFile('draft.wav', draftAudio);
507
+
508
+ // Final quality — use 3B for production
509
+ const finalAudio = await generateLocalTTS(
510
+ 'Welcome to our podcast on faith and technology.',
511
+ '3B'
512
+ );
513
+ await writeFile('final.wav', finalAudio);
514
+
515
+ console.log(`Draft: ${draftAudio.length} bytes, Final: ${finalAudio.length} bytes`);
516
+ }
517
+ ```
518
+
519
+ ### 3c. Google Cloud TTS (with SSML)
520
+
521
+ ```typescript
522
+ import { TextToSpeechClient, protos } from '@google-cloud/text-to-speech';
523
+ import { writeFile } from 'fs/promises';
524
+
525
+ type AudioEncoding = protos.google.cloud.texttospeech.v1.AudioEncoding;
526
+
527
+ interface GoogleTTSConfig {
528
+ languageCode?: string;
529
+ voiceName?: string; // e.g., 'en-US-Neural2-D' (male), 'en-US-Neural2-F' (female)
530
+ audioEncoding?: 'MP3' | 'LINEAR16' | 'OGG_OPUS';
531
+ speakingRate?: number; // 0.25 to 4.0 (1.0 = normal)
532
+ pitch?: number; // -20.0 to 20.0 semitones
533
+ }
534
+
535
+ /**
536
+ * Generate speech from SSML using Google Cloud TTS.
537
+ * Requires: npm install @google-cloud/text-to-speech
538
+ * Auth: GOOGLE_APPLICATION_CREDENTIALS env var pointing to service account JSON
539
+ */
540
+ async function googleTTS(
541
+ ssml: string,
542
+ config: GoogleTTSConfig = {}
543
+ ): Promise<Buffer> {
544
+ const {
545
+ languageCode = 'en-US',
546
+ voiceName = 'en-US-Neural2-D',
547
+ audioEncoding = 'MP3',
548
+ speakingRate = 1.0,
549
+ pitch = 0,
550
+ } = config;
551
+
552
+ const client = new TextToSpeechClient();
553
+
554
+ const encodingMap: Record<string, number> = {
555
+ 'MP3': 2, // protos.google.cloud.texttospeech.v1.AudioEncoding.MP3
556
+ 'LINEAR16': 1, // LINEAR16 (WAV)
557
+ 'OGG_OPUS': 3, // OGG_OPUS
558
+ };
559
+
560
+ const [response] = await client.synthesizeSpeech({
561
+ input: { ssml },
562
+ voice: {
563
+ languageCode,
564
+ name: voiceName,
565
+ },
566
+ audioConfig: {
567
+ audioEncoding: encodingMap[audioEncoding] as AudioEncoding,
568
+ speakingRate,
569
+ pitch,
570
+ // Enable EBU R128 loudness normalization for consistent output
571
+ effectsProfileId: ['headphone-class-device'],
572
+ },
573
+ });
574
+
575
+ if (!response.audioContent) {
576
+ throw new Error('No audio content in Google TTS response');
577
+ }
578
+
579
+ return Buffer.from(response.audioContent as Uint8Array);
580
+ }
581
+
582
+ /**
583
+ * Build SSML markup from structured content.
584
+ * SSML gives fine-grained control over pauses, emphasis, and pronunciation.
585
+ */
586
+ function buildSSML(segments: SSMLSegment[]): string {
587
+ const inner = segments.map((seg) => {
588
+ switch (seg.type) {
589
+ case 'text':
590
+ return seg.text;
591
+ case 'pause':
592
+ return `<break time="${seg.duration || '500ms'}"/>`;
593
+ case 'emphasis':
594
+ return `<emphasis level="${seg.level || 'moderate'}">${seg.text}</emphasis>`;
595
+ case 'prosody':
596
+ return `<prosody rate="${seg.rate || 'medium'}" pitch="${seg.pitch || 'medium'}">${seg.text}</prosody>`;
597
+ case 'say-as':
598
+ return `<say-as interpret-as="${seg.interpretAs}">${seg.text}</say-as>`;
599
+ default:
600
+ return seg.text;
601
+ }
602
+ }).join('\n');
603
+
604
+ return `<speak>\n${inner}\n</speak>`;
605
+ }
606
+
607
+ interface SSMLSegment {
608
+ type: 'text' | 'pause' | 'emphasis' | 'prosody' | 'say-as';
609
+ text?: string;
610
+ duration?: string; // For pause: '250ms', '1s', '2s'
611
+ level?: 'reduced' | 'moderate' | 'strong'; // For emphasis
612
+ rate?: 'x-slow' | 'slow' | 'medium' | 'fast' | 'x-fast'; // For prosody
613
+ pitch?: 'x-low' | 'low' | 'medium' | 'high' | 'x-high'; // For prosody
614
+ interpretAs?: 'date' | 'time' | 'telephone' | 'cardinal' | 'ordinal' | 'spell-out'; // For say-as
615
+ }
616
+
617
+ // --- Usage ---
618
+ async function main() {
619
+ // Simple text
620
+ const simpleAudio = await googleTTS(
621
+ '<speak>Welcome to our podcast on faith and technology.</speak>'
622
+ );
623
+ await writeFile('simple.mp3', simpleAudio);
624
+
625
+ // Rich SSML with pauses, emphasis, and prosody
626
+ const ssml = buildSSML([
627
+ { type: 'prosody', text: 'Welcome to Ministry Tech Weekly.', rate: 'slow', pitch: 'low' },
628
+ { type: 'pause', duration: '750ms' },
629
+ { type: 'text', text: 'Today we explore how churches are using' },
630
+ { type: 'emphasis', text: 'artificial intelligence', level: 'strong' },
631
+ { type: 'text', text: 'to reach their communities.' },
632
+ { type: 'pause', duration: '1s' },
633
+ { type: 'prosody', text: "Let's dive in.", rate: 'medium', pitch: 'high' },
634
+ ]);
635
+
636
+ const richAudio = await googleTTS(ssml, {
637
+ voiceName: 'en-US-Neural2-D', // Deep male voice
638
+ speakingRate: 0.95,
639
+ });
640
+ await writeFile('rich-ssml.mp3', richAudio);
641
+
642
+ console.log(`Simple: ${simpleAudio.length}B, Rich: ${richAudio.length}B`);
643
+ }
644
+ ```
645
+
646
+ ---
647
+
648
+ ## 4. Multi-Speaker Podcast Pattern
649
+
650
+ Generate a two-speaker podcast where different TTS voices handle Host vs. Guest roles, with natural pauses and transitions composed via FFmpeg.
651
+
652
+ ### Architecture
653
+
654
+ ```
655
+ Script (JSON)
656
+ |
657
+ v
658
+ [Parse turns] --> Host voice (Voice A) --> audio_001.wav
659
+ Guest voice (Voice B) --> audio_002.wav
660
+ Host voice (Voice A) --> audio_003.wav
661
+ ...
662
+ |
663
+ v
664
+ [Generate silence segments] --> silence_500ms.wav, silence_1000ms.wav
665
+ |
666
+ v
667
+ [FFmpeg concat] --> raw_podcast.wav
668
+ |
669
+ v
670
+ [Post-processing] --> final_podcast.mp3
671
+ (See audio-enhancement-pipeline.md)
672
+ ```
673
+
674
+ ### TypeScript Implementation
675
+
676
+ ```typescript
677
+ import { execFile } from 'child_process';
678
+ import { writeFile, readFile, unlink, mkdir } from 'fs/promises';
679
+ import { promisify } from 'util';
680
+ import path from 'path';
681
+ import { randomUUID } from 'crypto';
682
+
683
+ const execFileAsync = promisify(execFile);
684
+
685
+ // --- Types ---
686
+
687
+ interface PodcastTurn {
688
+ speaker: 'host' | 'guest';
689
+ text: string;
690
+ emotion?: number; // 0.0-1.0 for Chatterbox, ignored for others
691
+ pauseAfter?: number; // Milliseconds of silence after this turn
692
+ }
693
+
694
+ interface PodcastConfig {
695
+ hostVoice: VoiceConfig;
696
+ guestVoice: VoiceConfig;
697
+ defaultPause: number; // Default pause between turns (ms)
698
+ outputPath: string;
699
+ tempDir?: string;
700
+ }
701
+
702
+ interface VoiceConfig {
703
+ provider: 'elevenlabs' | 'orpheus' | 'chatterbox' | 'google';
704
+ voiceId: string; // Voice ID or model path
705
+ // Provider-specific options
706
+ apiKey?: string;
707
+ modelSize?: '3B' | '1B' | '400M' | '150M';
708
+ emotion?: number;
709
+ speakingRate?: number;
710
+ }
711
+
712
+ // --- Podcast Generator ---
713
+
714
+ async function generatePodcast(
715
+ script: PodcastTurn[],
716
+ config: PodcastConfig
717
+ ): Promise<string> {
718
+ const tempDir = config.tempDir || `/tmp/podcast-${randomUUID()}`;
719
+ await mkdir(tempDir, { recursive: true });
720
+
721
+ const audioSegments: string[] = [];
722
+ let segmentIndex = 0;
723
+
724
+ console.log(`Generating ${script.length} segments...`);
725
+
726
+ for (const turn of script) {
727
+ // 1. Generate speech for this turn
728
+ const voiceConfig = turn.speaker === 'host' ? config.hostVoice : config.guestVoice;
729
+ const speechFile = path.join(tempDir, `seg_${String(segmentIndex).padStart(4, '0')}_speech.wav`);
730
+
731
+ const audioBuffer = await generateTTSForProvider(turn.text, voiceConfig, turn.emotion);
732
+ await writeFile(speechFile, audioBuffer);
733
+ audioSegments.push(speechFile);
734
+ segmentIndex++;
735
+
736
+ // 2. Add pause after this turn
737
+ const pauseDuration = turn.pauseAfter ?? config.defaultPause;
738
+ if (pauseDuration > 0) {
739
+ const silenceFile = path.join(tempDir, `seg_${String(segmentIndex).padStart(4, '0')}_silence.wav`);
740
+ await generateSilence(pauseDuration, silenceFile);
741
+ audioSegments.push(silenceFile);
742
+ segmentIndex++;
743
+ }
744
+
745
+ console.log(` [${segmentIndex}/${script.length * 2}] ${turn.speaker}: "${turn.text.slice(0, 50)}..."`);
746
+ }
747
+
748
+ // 3. Concatenate all segments with FFmpeg
749
+ const concatListFile = path.join(tempDir, 'concat_list.txt');
750
+ const concatContent = audioSegments.map((f) => `file '${f}'`).join('\n');
751
+ await writeFile(concatListFile, concatContent);
752
+
753
+ const rawOutput = path.join(tempDir, 'raw_podcast.wav');
754
+ await execFileAsync('ffmpeg', [
755
+ '-y',
756
+ '-f', 'concat',
757
+ '-safe', '0',
758
+ '-i', concatListFile,
759
+ '-c:a', 'pcm_s16le',
760
+ '-ar', '44100',
761
+ '-ac', '1',
762
+ rawOutput,
763
+ ]);
764
+
765
+ // 4. Post-process: normalize loudness (EBU R128) and export as MP3
766
+ await execFileAsync('ffmpeg', [
767
+ '-y',
768
+ '-i', rawOutput,
769
+ '-af', [
770
+ 'loudnorm=I=-16:TP=-1.5:LRA=11', // EBU R128 broadcast standard
771
+ 'aresample=44100', // Consistent sample rate
772
+ ].join(','),
773
+ '-c:a', 'libmp3lame',
774
+ '-b:a', '192k',
775
+ config.outputPath,
776
+ ]);
777
+
778
+ // 5. Clean up temp files
779
+ for (const f of audioSegments) {
780
+ await unlink(f).catch(() => {});
781
+ }
782
+ await unlink(concatListFile).catch(() => {});
783
+ await unlink(rawOutput).catch(() => {});
784
+
785
+ console.log(`Podcast saved to: ${config.outputPath}`);
786
+ return config.outputPath;
787
+ }
788
+
789
+ /**
790
+ * Route TTS generation to the configured provider.
791
+ */
792
+ async function generateTTSForProvider(
793
+ text: string,
794
+ voice: VoiceConfig,
795
+ emotionOverride?: number
796
+ ): Promise<Buffer> {
797
+ switch (voice.provider) {
798
+ case 'elevenlabs':
799
+ return generateSpeech(text, {
800
+ apiKey: voice.apiKey!,
801
+ voiceId: voice.voiceId,
802
+ });
803
+
804
+ case 'orpheus':
805
+ return generateLocalTTS(text, voice.modelSize || '3B');
806
+
807
+ case 'chatterbox':
808
+ return generateChatterboxTTS(text, voice.voiceId, emotionOverride ?? voice.emotion ?? 0.5);
809
+
810
+ case 'google':
811
+ return googleTTS(`<speak>${text}</speak>`, { voiceName: voice.voiceId });
812
+
813
+ default:
814
+ throw new Error(`Unknown TTS provider: ${voice.provider}`);
815
+ }
816
+ }
817
+
818
+ /**
819
+ * Generate silence of a given duration using FFmpeg.
820
+ */
821
+ async function generateSilence(durationMs: number, outputPath: string): Promise<void> {
822
+ const durationSec = durationMs / 1000;
823
+ await execFileAsync('ffmpeg', [
824
+ '-y',
825
+ '-f', 'lavfi',
826
+ '-i', `anullsrc=r=44100:cl=mono`,
827
+ '-t', String(durationSec),
828
+ '-c:a', 'pcm_s16le',
829
+ outputPath,
830
+ ]);
831
+ }
832
+
833
+ /**
834
+ * Generate speech with Chatterbox (emotion slider).
835
+ */
836
+ async function generateChatterboxTTS(
837
+ text: string,
838
+ referenceAudioPath: string,
839
+ emotion: number
840
+ ): Promise<Buffer> {
841
+ const outputFile = `/tmp/chatterbox-${randomUUID()}.wav`;
842
+
843
+ const pythonScript = `
844
+ import sys, torch
845
+ from chatterbox.tts import ChatterboxTTS
846
+
847
+ text = sys.stdin.read()
848
+ model = ChatterboxTTS.from_pretrained(device="cuda" if torch.cuda.is_available() else "cpu")
849
+ audio = model.generate(
850
+ text=text,
851
+ audio_prompt_path="${referenceAudioPath.replace(/\\/g, '/')}",
852
+ exaggeration=${emotion},
853
+ )
854
+ import torchaudio
855
+ torchaudio.save("${outputFile}", audio, model.sr)
856
+ print("OK")
857
+ `;
858
+
859
+ return new Promise((resolve, reject) => {
860
+ const proc = spawn('python', ['-c', pythonScript], { timeout: 120_000 });
861
+ proc.stdin.write(text);
862
+ proc.stdin.end();
863
+ let stderr = '';
864
+ proc.stderr.on('data', (d: Buffer) => { stderr += d.toString(); });
865
+ proc.on('close', async (code: number) => {
866
+ if (code !== 0) return reject(new Error(`Chatterbox failed: ${stderr}`));
867
+ try {
868
+ const buf = await readFile(outputFile);
869
+ await unlink(outputFile);
870
+ resolve(buf);
871
+ } catch (e) { reject(e); }
872
+ });
873
+ proc.on('error', (e: Error) => reject(e));
874
+ });
875
+ }
876
+
877
+ // --- Example Podcast Script ---
878
+
879
+ async function main() {
880
+ const script: PodcastTurn[] = [
881
+ {
882
+ speaker: 'host',
883
+ text: 'Welcome back to Ministry Tech Weekly. I am your host, and today we have an incredible guest joining us.',
884
+ pauseAfter: 800,
885
+ },
886
+ {
887
+ speaker: 'host',
888
+ text: 'We are going to talk about how small churches can leverage AI tools without breaking the budget.',
889
+ pauseAfter: 1200,
890
+ },
891
+ {
892
+ speaker: 'guest',
893
+ text: 'Thanks for having me. This is a topic close to my heart. I have been working with rural congregations for the past five years.',
894
+ emotion: 0.6, // Warm, enthusiastic
895
+ pauseAfter: 600,
896
+ },
897
+ {
898
+ speaker: 'host',
899
+ text: 'So let us start with the basics. What is the first AI tool you recommend to a church with zero tech budget?',
900
+ pauseAfter: 1000,
901
+ },
902
+ {
903
+ speaker: 'guest',
904
+ text: 'Great question. I always say start with transcription. Record your sermons, transcribe them with a free tool, and now you have written content for your website, social media, and email newsletters.',
905
+ emotion: 0.7, // Passionate
906
+ pauseAfter: 500,
907
+ },
908
+ {
909
+ speaker: 'host',
910
+ text: 'That is such a practical starting point. One recording becomes five pieces of content.',
911
+ pauseAfter: 1500,
912
+ },
913
+ ];
914
+
915
+ await generatePodcast(script, {
916
+ hostVoice: {
917
+ provider: 'orpheus',
918
+ voiceId: 'leo', // Deep male voice
919
+ modelSize: '3B',
920
+ },
921
+ guestVoice: {
922
+ provider: 'chatterbox',
923
+ voiceId: '/path/to/guest-reference-5sec.wav',
924
+ emotion: 0.5,
925
+ },
926
+ defaultPause: 700,
927
+ outputPath: './ministry-tech-weekly-ep1.mp3',
928
+ });
929
+ }
930
+ ```
931
+
932
+ ### Key Patterns for Natural-Sounding Podcasts
933
+
934
+ 1. **Vary pause lengths:** Short (400-600ms) within a thought, medium (700-1000ms) between topics, long (1200-1500ms) for dramatic effect or topic transitions.
935
+
936
+ 2. **Match voice characteristics:** Pair voices with complementary tones — a deep, steady host with a warmer, more expressive guest.
937
+
938
+ 3. **Use Chatterbox emotion strategically:**
939
+ - `emotion: 0.3` — Calm, informational (reading statistics, facts)
940
+ - `emotion: 0.5` — Conversational (default for most dialogue)
941
+ - `emotion: 0.7` — Enthusiastic (key points, exciting reveals)
942
+ - `emotion: 0.9` — Very dramatic (use sparingly for climactic moments)
943
+
944
+ 4. **Post-process with audio-enhancement-pipeline.md:** After concatenation, run the full enhancement pipeline (noise reduction, loudness normalization, compression) for broadcast-ready output.
945
+
946
+ ---
947
+
948
+ ## 5. Quality Tips
949
+
950
+ ### Audio Normalization (Critical)
951
+
952
+ Always normalize TTS output to EBU R128 loudness standard. Different providers output at wildly different levels — mixing them without normalization creates jarring volume jumps.
953
+
954
+ ```bash
955
+ # Normalize a single file to -16 LUFS (podcast standard)
956
+ ffmpeg -i input.wav -af loudnorm=I=-16:TP=-1.5:LRA=11 -ar 44100 output.wav
957
+
958
+ # Two-pass normalization (more accurate, recommended for final output)
959
+ # Pass 1: Measure
960
+ ffmpeg -i input.wav -af loudnorm=I=-16:TP=-1.5:LRA=11:print_format=json -f null /dev/null 2>&1
961
+
962
+ # Pass 2: Apply measured values (use values from pass 1 output)
963
+ ffmpeg -i input.wav -af loudnorm=I=-16:TP=-1.5:LRA=11:measured_I=-23.5:measured_LRA=7.2:measured_TP=-3.1:measured_thresh=-34.2 output.wav
964
+ ```
965
+
966
+ ### Pacing and Pauses
967
+
968
+ - Insert 200-400ms silence between sentences for natural breathing rhythm
969
+ - Insert 600-1000ms between paragraphs or topic changes
970
+ - For dramatic effect, use 1500-2000ms pauses before key reveals
971
+ - Never go below 150ms between sentences — it sounds rushed and robotic
972
+
973
+ ### Provider-Specific Tips
974
+
975
+ **Orpheus TTS:**
976
+ - Use 3B model for all final/published output — the quality gap vs 1B is significant
977
+ - Use 400M model for drafts, previews, and rapid iteration (4x faster)
978
+ - 150M is viable for real-time chat applications where latency matters more than quality
979
+ - Pre-load the model once and reuse — cold start takes 10-15 seconds on consumer GPUs
980
+ - The model reads emotion from text context. Writing "excited" or "sadly" in the text naturally affects prosody
981
+
982
+ **Chatterbox:**
983
+ - `emotion: 0.3` — News anchors, formal narration, educational content
984
+ - `emotion: 0.5` — Default conversational tone, general-purpose
985
+ - `emotion: 0.7` — Casual podcast, storytelling, engaging narration
986
+ - `emotion: 0.9-1.0` — Use sparingly: dramatic readings, voice acting, emphasis
987
+ - The 5-second voice clone reference should be clean audio (no background noise, no music)
988
+ - Built-in watermarking is on by default — disable explicitly if not needed
989
+
990
+ **Bark:**
991
+ - Use text tags for non-speech: `[laughs]`, `[sighs]`, `[clears throat]`, `[music]`
992
+ - Generates complete clips (not streaming) — best for short segments
993
+ - Quality varies between runs — generate 2-3 takes and pick the best
994
+ - Not suitable for long-form narration (use Orpheus or ElevenLabs instead)
995
+
996
+ **ElevenLabs:**
997
+ - Lower `stability` (0.2-0.4) for more expressive, varied speech
998
+ - Higher `stability` (0.7-0.9) for consistent, professional narration
999
+ - Use `eleven_turbo_v2_5` model for speed, `eleven_multilingual_v2` for best quality
1000
+ - Voice cloning: Instant clone (30s clip) is good; Professional clone (30+ min) is excellent
1001
+ - Monitor credit usage — credits are consumed per character, not per request
1002
+
1003
+ **Google Cloud TTS:**
1004
+ - Always use Neural2 or WaveNet voices (Standard voices sound robotic)
1005
+ - SSML `<break>` tags give precise pause control: `<break time="750ms"/>`
1006
+ - Use `effectsProfileId: ['headphone-class-device']` for optimized podcast output
1007
+ - Batch requests for cost efficiency — the API charges per character, and each request has overhead
1008
+
1009
+ **Azure AI Speech:**
1010
+ - HD V2 tier reads surrounding context to modulate emotion automatically
1011
+ - Word boundary events enable subtitle generation with per-word timestamps
1012
+ - Use SSML `<mstts:express-as>` for explicit emotion: `style="cheerful"`, `style="sad"`, `style="angry"`
1013
+
1014
+ ### Cross-Reference
1015
+
1016
+ For post-processing your TTS output, see the companion skill:
1017
+ - **audio-enhancement-pipeline.md** — Noise reduction, loudness normalization, compression, format conversion pipeline
1018
+ - **ffmpeg-command-generator.md** — FFmpeg commands for any audio/video transformation
1019
+ - **content-repurposing-pipeline.md** — Turn generated audio into social media clips, transcripts, and blog posts
1020
+
1021
+ ---
1022
+
1023
+ ## 6. Cost Calculator
1024
+
1025
+ ### Cost Per Minute of Generated Speech
1026
+
1027
+ Average speaking rate: ~150 words/minute, ~750 characters/minute.
1028
+
1029
+ | Provider | Cost per 1k chars | Cost per minute | Cost for 30-min episode |
1030
+ |----------|-------------------|-----------------|-------------------------|
1031
+ | **Orpheus (local)** | ~$0.01 (electricity) | ~$0.008 | ~$0.24 |
1032
+ | **Chatterbox (local)** | ~$0.01 (electricity) | ~$0.008 | ~$0.24 |
1033
+ | **Bark (local)** | ~$0.01 (electricity) | ~$0.008 | ~$0.24 |
1034
+ | **Parler TTS (local)** | ~$0.01 (electricity) | ~$0.008 | ~$0.24 |
1035
+ | **ElevenLabs Free** | $0.00 | $0.00 | Free (10 min/mo limit) |
1036
+ | **ElevenLabs Starter** | ~$0.17 | ~$0.13 | ~$3.75 |
1037
+ | **ElevenLabs Creator** | ~$0.22 | ~$0.17 | ~$5.00 |
1038
+ | **ElevenLabs Pro** | ~$0.20 | ~$0.15 | ~$4.50 |
1039
+ | **Google Cloud Standard** | $0.004 | ~$0.003 | ~$0.09 |
1040
+ | **Google Cloud WaveNet** | $0.016 | ~$0.012 | ~$0.36 |
1041
+ | **Google Cloud Neural2** | $0.016 | ~$0.012 | ~$0.36 |
1042
+ | **Azure Neural** | $0.016 | ~$0.012 | ~$0.36 |
1043
+ | **Azure HD V2** | $0.030 | ~$0.023 | ~$0.68 |
1044
+
1045
+ ### Local GPU Electricity Cost Estimate
1046
+
1047
+ ```
1048
+ Formula: GPU TDP (watts) * generation_time (hours) * electricity_rate ($/kWh)
1049
+
1050
+ Example: Orpheus 3B on RTX 4090 (450W TDP at ~60% utilization)
1051
+ - 30 minutes of speech ≈ 15 minutes generation time (2:1 real-time ratio)
1052
+ - Power: 270W * 0.25 hours = 0.0675 kWh
1053
+ - Cost: 0.0675 * $0.12/kWh = $0.008
1054
+
1055
+ Example: Chatterbox on RTX 3060 (170W TDP at ~80% utilization)
1056
+ - 30 minutes of speech ≈ 25 minutes generation time
1057
+ - Power: 136W * 0.42 hours = 0.057 kWh
1058
+ - Cost: 0.057 * $0.12/kWh = $0.007
1059
+ ```
1060
+
1061
+ ### Cost Comparison: Weekly Podcast (30 minutes)
1062
+
1063
+ | Approach | Monthly Cost | Annual Cost | Notes |
1064
+ |----------|-------------|-------------|-------|
1065
+ | **Orpheus 3B (local, RTX 4090)** | ~$1.00 | ~$12 | GPU amortization not included |
1066
+ | **Chatterbox (local, RTX 3060)** | ~$1.00 | ~$12 | GPU amortization not included |
1067
+ | **Google Cloud Neural2** | ~$1.44 | ~$17 | Most cost-effective cloud |
1068
+ | **Azure Neural** | ~$1.44 | ~$17 | Add $1.24/mo for HD V2 |
1069
+ | **ElevenLabs Starter** | $5.00 | $60 | Fixed monthly (may exceed limits) |
1070
+ | **ElevenLabs Pro** | $99.00 | $1,188 | Unlimited for heavy use |
1071
+
1072
+ ### Break-Even: Local GPU vs. Cloud API
1073
+
1074
+ ```
1075
+ Break-even calculation: When does buying a GPU pay for itself?
1076
+
1077
+ RTX 4090 ($1,600) vs. ElevenLabs Pro ($99/mo):
1078
+ Break-even: $1,600 / $99 = ~16 months
1079
+ After 16 months, local is essentially free (just electricity)
1080
+
1081
+ RTX 3060 ($300) vs. Google Cloud Neural2 ($1.44/mo):
1082
+ Break-even: $300 / $1.44 = ~208 months (17+ years)
1083
+ Google Cloud is so cheap that a dedicated GPU rarely makes sense for cost alone.
1084
+ But: privacy, latency, and offline capability are valid reasons to go local.
1085
+ ```
1086
+
1087
+ ### Quick Estimation Formula
1088
+
1089
+ ```
1090
+ Characters in text ≈ words * 5
1091
+ Cost = (characters / 1000) * provider_rate_per_1k_chars
1092
+
1093
+ Example: 3,000-word blog post narration
1094
+ Characters: 3,000 * 5 = 15,000
1095
+ ElevenLabs Starter: (15,000 / 1000) * $0.17 = $2.55
1096
+ Google Neural2: (15,000 / 1000) * $0.016 = $0.24
1097
+ Azure HD V2: (15,000 / 1000) * $0.030 = $0.45
1098
+ ```
1099
+
1100
+ ---
1101
+
1102
+ ## 7. Advanced Patterns
1103
+
1104
+ ### Hybrid Approach: Draft Local, Publish Cloud
1105
+
1106
+ Use cheap/fast local models for iteration, then generate final output with the highest-quality provider.
1107
+
1108
+ ```typescript
1109
+ interface HybridTTSConfig {
1110
+ draftProvider: VoiceConfig; // Fast, local — for review
1111
+ finalProvider: VoiceConfig; // Best quality — for publishing
1112
+ }
1113
+
1114
+ async function hybridGenerate(
1115
+ text: string,
1116
+ config: HybridTTSConfig,
1117
+ isFinal: boolean = false
1118
+ ): Promise<Buffer> {
1119
+ const provider = isFinal ? config.finalProvider : config.draftProvider;
1120
+ return generateTTSForProvider(text, provider);
1121
+ }
1122
+
1123
+ // Usage:
1124
+ const hybrid: HybridTTSConfig = {
1125
+ draftProvider: {
1126
+ provider: 'orpheus',
1127
+ voiceId: 'tara',
1128
+ modelSize: '400M', // Fast drafts
1129
+ },
1130
+ finalProvider: {
1131
+ provider: 'elevenlabs',
1132
+ voiceId: 'pNInz6obpgDQGcFmaJgB',
1133
+ apiKey: process.env.ELEVENLABS_API_KEY!,
1134
+ },
1135
+ };
1136
+
1137
+ // Iterate on script with fast local TTS
1138
+ const draft = await hybridGenerate(scriptText, hybrid, false);
1139
+ // ... review, adjust script ...
1140
+
1141
+ // Generate final with ElevenLabs quality
1142
+ const final = await hybridGenerate(scriptText, hybrid, true);
1143
+ ```
1144
+
1145
+ ### Caching Generated Audio
1146
+
1147
+ TTS is deterministic for the same input — cache aggressively to avoid regenerating unchanged segments.
1148
+
1149
+ ```typescript
1150
+ import { createHash } from 'crypto';
1151
+ import { readFile, writeFile, access } from 'fs/promises';
1152
+ import path from 'path';
1153
+
1154
+ const CACHE_DIR = '/tmp/tts-cache';
1155
+
1156
+ function getCacheKey(text: string, provider: string, voiceId: string): string {
1157
+ const hash = createHash('sha256')
1158
+ .update(`${provider}:${voiceId}:${text}`)
1159
+ .digest('hex')
1160
+ .slice(0, 16);
1161
+ return hash;
1162
+ }
1163
+
1164
+ async function cachedTTS(
1165
+ text: string,
1166
+ voice: VoiceConfig,
1167
+ cacheDir: string = CACHE_DIR
1168
+ ): Promise<Buffer> {
1169
+ const key = getCacheKey(text, voice.provider, voice.voiceId);
1170
+ const cachePath = path.join(cacheDir, `${key}.wav`);
1171
+
1172
+ // Check cache first
1173
+ try {
1174
+ await access(cachePath);
1175
+ console.log(` Cache hit: ${key}`);
1176
+ return readFile(cachePath);
1177
+ } catch {
1178
+ // Cache miss — generate
1179
+ }
1180
+
1181
+ const audio = await generateTTSForProvider(text, voice);
1182
+ await writeFile(cachePath, audio);
1183
+ console.log(` Cache miss, generated: ${key}`);
1184
+ return audio;
1185
+ }
1186
+ ```
1187
+
1188
+ ### Long-Form Content Chunking
1189
+
1190
+ Most TTS providers have text length limits (typically 5,000 characters). For long content, split at sentence boundaries and concatenate.
1191
+
1192
+ ```typescript
1193
+ /**
1194
+ * Split text into chunks at sentence boundaries, respecting max character limit.
1195
+ */
1196
+ function chunkText(text: string, maxChars: number = 4500): string[] {
1197
+ const sentences = text.match(/[^.!?]+[.!?]+/g) || [text];
1198
+ const chunks: string[] = [];
1199
+ let current = '';
1200
+
1201
+ for (const sentence of sentences) {
1202
+ if (current.length + sentence.length > maxChars) {
1203
+ if (current.length > 0) {
1204
+ chunks.push(current.trim());
1205
+ current = '';
1206
+ }
1207
+ // Handle single sentences longer than maxChars
1208
+ if (sentence.length > maxChars) {
1209
+ chunks.push(sentence.trim());
1210
+ continue;
1211
+ }
1212
+ }
1213
+ current += sentence;
1214
+ }
1215
+
1216
+ if (current.trim().length > 0) {
1217
+ chunks.push(current.trim());
1218
+ }
1219
+
1220
+ return chunks;
1221
+ }
1222
+
1223
+ /**
1224
+ * Generate TTS for long-form content by chunking and concatenating.
1225
+ */
1226
+ async function generateLongFormTTS(
1227
+ text: string,
1228
+ voice: VoiceConfig,
1229
+ outputPath: string
1230
+ ): Promise<void> {
1231
+ const chunks = chunkText(text);
1232
+ const tempFiles: string[] = [];
1233
+
1234
+ console.log(`Generating ${chunks.length} chunks...`);
1235
+
1236
+ for (let i = 0; i < chunks.length; i++) {
1237
+ const audio = await cachedTTS(chunks[i], voice);
1238
+ const tempFile = `/tmp/longform-${i.toString().padStart(4, '0')}.wav`;
1239
+ await writeFile(tempFile, audio);
1240
+ tempFiles.push(tempFile);
1241
+ }
1242
+
1243
+ // Concatenate with FFmpeg
1244
+ const listFile = '/tmp/longform-concat.txt';
1245
+ await writeFile(listFile, tempFiles.map((f) => `file '${f}'`).join('\n'));
1246
+
1247
+ await execFileAsync('ffmpeg', [
1248
+ '-y', '-f', 'concat', '-safe', '0',
1249
+ '-i', listFile,
1250
+ '-af', 'loudnorm=I=-16:TP=-1.5:LRA=11',
1251
+ '-c:a', 'libmp3lame', '-b:a', '192k',
1252
+ outputPath,
1253
+ ]);
1254
+
1255
+ // Cleanup
1256
+ for (const f of tempFiles) await unlink(f).catch(() => {});
1257
+ await unlink(listFile).catch(() => {});
1258
+ }
1259
+ ```
1260
+
1261
+ ---
1262
+
1263
+ ## 8. Provider Setup Checklists
1264
+
1265
+ ### ElevenLabs Setup
1266
+
1267
+ ```bash
1268
+ # 1. Sign up at elevenlabs.io
1269
+ # 2. Get API key from Profile → API Keys
1270
+ # 3. Install (optional — can use raw fetch)
1271
+ npm install elevenlabs # Official SDK (optional)
1272
+
1273
+ # 4. Set environment variable
1274
+ export ELEVENLABS_API_KEY=sk_xxxxxxxxxxxxxxxxxxxxxxxx
1275
+
1276
+ # 5. Test with curl
1277
+ curl -X POST "https://api.elevenlabs.io/v1/text-to-speech/pNInz6obpgDQGcFmaJgB" \
1278
+ -H "xi-api-key: $ELEVENLABS_API_KEY" \
1279
+ -H "Content-Type: application/json" \
1280
+ -d '{"text": "Hello world", "model_id": "eleven_multilingual_v2"}' \
1281
+ --output test.mp3
1282
+ ```
1283
+
1284
+ ### Orpheus TTS Setup
1285
+
1286
+ ```bash
1287
+ # 1. Install Python dependencies
1288
+ pip install orpheus-tts torch torchaudio
1289
+
1290
+ # 2. First run downloads model (~6GB for 3B)
1291
+ python -c "from orpheus_tts import OrpheusModel; m = OrpheusModel('canopyai/Orpheus-TTS-0.1-3B')"
1292
+
1293
+ # 3. Test generation
1294
+ python -c "
1295
+ from orpheus_tts import OrpheusModel
1296
+ model = OrpheusModel('canopyai/Orpheus-TTS-0.1-3B')
1297
+ audio = model.generate_speech(prompt='Hello world', voice='tara')
1298
+ audio.export('test.wav', format='wav')
1299
+ print('Success')
1300
+ "
1301
+ ```
1302
+
1303
+ ### Chatterbox Setup
1304
+
1305
+ ```bash
1306
+ # 1. Install
1307
+ pip install chatterbox-tts torch torchaudio
1308
+
1309
+ # 2. Test with emotion slider
1310
+ python -c "
1311
+ import torch
1312
+ from chatterbox.tts import ChatterboxTTS
1313
+
1314
+ model = ChatterboxTTS.from_pretrained(device='cuda' if torch.cuda.is_available() else 'cpu')
1315
+ # No reference audio = default voice
1316
+ audio = model.generate(text='Hello world', exaggeration=0.5)
1317
+ import torchaudio
1318
+ torchaudio.save('test.wav', audio, model.sr)
1319
+ print('Success')
1320
+ "
1321
+ ```
1322
+
1323
+ ### Google Cloud TTS Setup
1324
+
1325
+ ```bash
1326
+ # 1. Enable Text-to-Speech API in GCP Console
1327
+ # 2. Create service account and download JSON key
1328
+ # 3. Set credentials
1329
+ export GOOGLE_APPLICATION_CREDENTIALS=/path/to/service-account.json
1330
+
1331
+ # 4. Install SDK
1332
+ npm install @google-cloud/text-to-speech
1333
+
1334
+ # 5. Test
1335
+ npx ts-node -e "
1336
+ import { TextToSpeechClient } from '@google-cloud/text-to-speech';
1337
+ import { writeFileSync } from 'fs';
1338
+
1339
+ const client = new TextToSpeechClient();
1340
+ async function test() {
1341
+ const [response] = await client.synthesizeSpeech({
1342
+ input: { text: 'Hello world' },
1343
+ voice: { languageCode: 'en-US', name: 'en-US-Neural2-D' },
1344
+ audioConfig: { audioEncoding: 2 },
1345
+ });
1346
+ writeFileSync('test.mp3', response.audioContent as Buffer);
1347
+ console.log('Success');
1348
+ }
1349
+ test();
1350
+ "
1351
+ ```
1352
+
1353
+ ---
1354
+
1355
+ ## 9. Troubleshooting
1356
+
1357
+ ### Common Issues
1358
+
1359
+ | Problem | Cause | Fix |
1360
+ |---------|-------|-----|
1361
+ | Orpheus CUDA out of memory | Model too large for GPU | Use smaller model: 1B or 400M |
1362
+ | ElevenLabs 401 error | Invalid or expired API key | Regenerate key at elevenlabs.io |
1363
+ | Google TTS "Permission denied" | Service account missing TTS role | Add `roles/texttospeech.user` to service account |
1364
+ | Chatterbox sounds distorted | Emotion too high | Reduce `exaggeration` to 0.3-0.5 |
1365
+ | Audio has clicks at chunk boundaries | Hard cuts between segments | Add 50-100ms crossfade between chunks |
1366
+ | Inconsistent volume between speakers | No normalization | Apply EBU R128 loudnorm to each segment |
1367
+ | Bark output varies wildly | Stochastic generation | Generate 3 takes, pick best; set seed for reproducibility |
1368
+
1369
+ ### Crossfade Between Chunks (Fix Clicking)
1370
+
1371
+ ```bash
1372
+ # Add 100ms crossfade between concatenated segments
1373
+ ffmpeg -i chunk1.wav -i chunk2.wav \
1374
+ -filter_complex "[0][1]acrossfade=d=0.1:c1=tri:c2=tri" \
1375
+ output.wav
1376
+ ```
1377
+
1378
+ ### Check GPU VRAM Before Loading Models
1379
+
1380
+ ```typescript
1381
+ import { execFile } from 'child_process';
1382
+ import { promisify } from 'util';
1383
+
1384
+ const execFileAsync = promisify(execFile);
1385
+
1386
+ async function getAvailableVRAM(): Promise<number> {
1387
+ try {
1388
+ const { stdout } = await execFileAsync('nvidia-smi', [
1389
+ '--query-gpu=memory.free',
1390
+ '--format=csv,noheader,nounits',
1391
+ ]);
1392
+ return parseInt(stdout.trim(), 10); // MB
1393
+ } catch {
1394
+ return 0; // No GPU or nvidia-smi not available
1395
+ }
1396
+ }
1397
+
1398
+ async function selectModelSize(): Promise<'3B' | '1B' | '400M' | '150M'> {
1399
+ const vramMB = await getAvailableVRAM();
1400
+
1401
+ if (vramMB >= 8000) return '3B';
1402
+ if (vramMB >= 4000) return '1B';
1403
+ if (vramMB >= 2000) return '400M';
1404
+ if (vramMB >= 1000) return '150M';
1405
+
1406
+ throw new Error(`Insufficient VRAM (${vramMB}MB). Minimum 1GB required for Orpheus 150M.`);
1407
+ }
1408
+ ```
1409
+
1410
+ ---
1411
+
1412
+ ## 10. Research Citations
1413
+
1414
+ > **Related skills:** audio-enhancement-pipeline.md (post-processing), ffmpeg-command-generator.md (media transforms), content-repurposing-pipeline.md (sermon-to-social pipeline), transcription-pipeline-selector.md (speech-to-text, the inverse operation).