@thierrynakoa/fire-flow 12.2.1 → 13.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CREDITS.md +25 -0
- package/DOMINION-FLOW-OVERVIEW.md +182 -38
- package/README.md +399 -455
- package/TROUBLESHOOTING.md +264 -264
- package/agents/fire-debugger.md +54 -0
- package/agents/fire-executor.md +1610 -1033
- package/agents/fire-fact-checker.md +1 -1
- package/agents/fire-planner.md +85 -17
- package/agents/fire-project-researcher.md +1 -1
- package/agents/fire-researcher.md +4 -22
- package/agents/{fire-phoenix-analyst.md → fire-resurrection-analyst.md} +394 -394
- package/agents/fire-reviewer.md +552 -499
- package/agents/fire-verifier.md +114 -19
- package/bin/cli.js +18 -101
- package/commands/fire-0-orient.md +2 -2
- package/commands/fire-1a-new.md +50 -15
- package/commands/fire-1c-setup.md +33 -5
- package/commands/fire-1d-discuss.md +87 -1
- package/commands/fire-2-plan.md +556 -527
- package/commands/fire-3-execute.md +2046 -1356
- package/commands/fire-4-verify.md +975 -906
- package/commands/fire-5-handoff.md +46 -5
- package/commands/fire-6-resume.md +2 -31
- package/commands/fire-add-new-skill.md +138 -19
- package/commands/fire-autonomous.md +14 -2
- package/commands/fire-complete-milestone.md +1 -1
- package/commands/fire-cost.md +179 -183
- package/commands/fire-debug.md +1 -6
- package/commands/fire-loop-resume.md +2 -2
- package/commands/fire-loop-stop.md +1 -1
- package/commands/fire-loop.md +2 -15
- package/commands/fire-map-codebase.md +1 -1
- package/commands/fire-migrate-database.md +548 -0
- package/commands/fire-new-milestone.md +1 -1
- package/commands/fire-reflect.md +1 -2
- package/commands/fire-research.md +142 -21
- package/commands/{fire-phoenix.md → fire-resurrect.md} +859 -603
- package/commands/fire-scaffold.md +297 -0
- package/commands/fire-search.md +1 -2
- package/commands/fire-security-scan.md +483 -484
- package/commands/fire-setup.md +359 -0
- package/commands/fire-skill.md +770 -0
- package/commands/fire-skills-diff.md +506 -506
- package/commands/fire-skills-history.md +388 -388
- package/commands/fire-skills-rollback.md +7 -7
- package/commands/fire-skills-sync.md +470 -470
- package/commands/fire-test.md +5 -5
- package/commands/fire-todos.md +1 -1
- package/commands/fire-update.md +5 -5
- package/commands/fire-validate-skills.md +282 -0
- package/commands/fire-vuln-scan.md +492 -493
- package/hooks/run-hook.sh +8 -8
- package/hooks/run-session-end.sh +7 -7
- package/hooks/session-end.sh +90 -90
- package/hooks/session-start.sh +1 -1
- package/package.json +4 -24
- package/plugin.json +7 -7
- package/references/autonomy-levels.md +235 -0
- package/references/behavioral-directives.md +95 -3
- package/references/blocker-tracking.md +1 -1
- package/references/circuit-breaker.md +93 -2
- package/references/context-engineering.md +227 -9
- package/references/honesty-protocols.md +70 -1
- package/references/issue-to-pr-pipeline.md +149 -150
- package/references/metrics-and-trends.md +1 -2
- package/references/research-improvements.md +4 -108
- package/references/sdlc-mapping.md +73 -0
- package/references/state-machine.md +151 -0
- package/skills-library/AVAILABLE_TOOLS_REFERENCE.md +333 -0
- package/skills-library/SKILLS-INDEX.md +57 -558
- package/skills-library/SKILLS_LIBRARY_INDEX.md +532 -0
- package/skills-library/_general/api-patterns/api-field-name-mismatch.md +107 -0
- package/skills-library/_general/api-patterns/streaming-command-timeout.md +122 -0
- package/skills-library/_general/api-patterns/streaming-proxy-cors-bypass.md +102 -0
- package/skills-library/_general/automation/settings-gui-generator.md +172 -0
- package/skills-library/_general/database-solutions/data-type-mapping-reference.md +181 -0
- package/skills-library/_general/database-solutions/mysql-limit-offset-string-coercion.md +102 -0
- package/skills-library/_general/database-solutions/mysql-to-pg-migration.md +195 -0
- package/skills-library/_general/database-solutions/orm-schema-portability.md +193 -0
- package/skills-library/_general/database-solutions/persistent-analysis-storage.md +207 -0
- package/skills-library/_general/database-solutions/pg-to-mysql-schema-migration-methodology.md +190 -0
- package/skills-library/_general/database-solutions/sql-dialect-compatibility-matrix.md +306 -0
- package/skills-library/_general/database-solutions/sqlite-to-pg-migration.md +219 -0
- package/skills-library/_general/frontend/canvas-bubble-animation-grouping.md +270 -0
- package/skills-library/_general/frontend/color-token-migration.md +112 -0
- package/skills-library/_general/frontend/framer-motion-layoutid-grouping.md +150 -0
- package/skills-library/_general/frontend/pyqt6-settings-dialog.md +191 -0
- package/skills-library/_general/frontend/react-flow-animated-layout-switching.md +101 -0
- package/skills-library/_general/frontend/react-hooks-order-debugging.md +141 -0
- package/skills-library/_general/frontend/redux-localstorage-auth-desync.md +126 -0
- package/skills-library/_general/frontend/safari-csp-theme-color-debugging.md +124 -0
- package/skills-library/_general/frontend/safari-sw-cache-poisoning.md +138 -0
- package/skills-library/_general/frontend/svg-sparkline-no-charting-library.md +131 -0
- package/skills-library/_general/growth-marketing/oss-daily-growth-intelligence.md +224 -0
- package/skills-library/_general/integrations/claude-code-local-mcp-integration.md +250 -0
- package/skills-library/_general/integrations/mcp-composite-tool-orchestration.md +200 -0
- package/skills-library/_general/methodology/AGENT_SDK_STANDALONE_TOOLING.md +181 -0
- package/skills-library/_general/methodology/AGENT_TEAMS_GUIDE.md +169 -0
- package/skills-library/_general/methodology/ALAS_STATEFUL_EXECUTION.md +207 -0
- package/skills-library/_general/methodology/AUTO_REVIEWER_SUBAGENT.md +211 -0
- package/skills-library/_general/methodology/CONSISTENCY_CHECK_AMBIGUITY_GATE.md +96 -0
- package/skills-library/_general/methodology/DEAD_ENDS_SHELF.md +4 -4
- package/skills-library/_general/methodology/DISTILL_NOT_DUMP.md +108 -0
- package/skills-library/_general/methodology/EXECUTION_PROGRESS_MONITOR.md +157 -0
- package/skills-library/_general/methodology/HIERARCHICAL_REVIEW_MARS.md +122 -0
- package/skills-library/_general/methodology/MCP_INTER_AGENT_BRIDGE.md +207 -0
- package/skills-library/_general/methodology/MERMAID_WIZARD_DIAGRAMS.md +77 -0
- package/skills-library/_general/methodology/MISSING_DIMENSION_DETECTOR.md +89 -0
- package/skills-library/_general/methodology/MULTI_AGENT_COORDINATION.md +397 -0
- package/skills-library/_general/methodology/OBSERVATION_MASKING.md +100 -0
- package/skills-library/_general/methodology/PHOENIX_REBUILD_METHODOLOGY.md +82 -11
- package/skills-library/_general/methodology/REVIEW_BACKTRACK_PANEL.md +140 -0
- package/skills-library/_general/methodology/REVIEW_FIX_LOOP.md +117 -0
- package/skills-library/_general/methodology/VOTING_VERDICT_ARBITRATION.md +155 -0
- package/skills-library/_general/methodology/ZERO_FRICTION_CLI_SETUP.md +2 -2
- package/skills-library/_general/methodology/dead-code-activation.md +123 -0
- package/skills-library/_general/methodology/debug-swarm-researcher-escape-hatch.md +240 -240
- package/skills-library/_general/methodology/shell-autonomous-loop-fixplan.md +1 -1
- package/skills-library/_general/patterns-standards/GOF_DESIGN_PATTERNS_FOR_AI_AGENTS.md +5 -5
- package/skills-library/_general/patterns-standards/cascading-failure-diagnosis.md +119 -0
- package/skills-library/_general/patterns-standards/domain-specific-layout-algorithms.md +209 -0
- package/skills-library/_general/patterns-standards/python-desktop-app-architecture.md +399 -0
- package/skills-library/_general/patterns-standards/realtime-monitoring-dashboard.md +457 -0
- package/skills-library/_general/patterns-standards/togglable-processing-pipeline.md +169 -0
- package/skills-library/_general/performance/liveclock-extraction.md +112 -0
- package/skills-library/_general/performance/ref-based-canvas-animation.md +117 -0
- package/skills-library/_general/performance/use-visible-interval.md +131 -0
- package/skills-library/_general/testing/playwright-firefox-withcredentials-auth-issue.md +104 -0
- package/skills-library/_quarantine/README.md +30 -0
- package/skills-library/api-patterns/BROADCAST_SCHEDULER_SHARED_EXECUTE_FUNCTION.md +150 -0
- package/skills-library/api-patterns/ERROR_RESPONSE_STANDARDS.md +145 -0
- package/skills-library/api-patterns/EXPRESS_ROUTE_ORDERING_MIDDLEWARE_INTERCEPTION.md +326 -0
- package/skills-library/api-patterns/PAGINATION_PATTERNS.md +137 -0
- package/skills-library/api-patterns/PODCAST_PROGRESS_TRACKING_THREE_ROOT_CAUSES.md +277 -0
- package/skills-library/api-patterns/RATE_LIMITING_TOGGLE.md +155 -0
- package/skills-library/api-patterns/graphql-content-queries.md +708 -0
- package/skills-library/appointment-scheduler-design.md +423 -0
- package/skills-library/automation/AUTO_POPULATE_COMPLETE_GUIDE.md +631 -0
- package/skills-library/automation/CC_WORKFLOW_STUDIO.md +83 -0
- package/skills-library/automation/CLAUDE_CODE_SWARM_MODE.md +95 -0
- package/skills-library/automation/DAEMON_TRIGGER_FILE_IPC.md +195 -0
- package/skills-library/automation/scheduled-content-publishing.md +608 -0
- package/skills-library/awesome-workflows/Blogging-Platform-Instructions/view_commands.md +25 -0
- package/skills-library/awesome-workflows/CREDENTIAL-SECURITY-WORKFLOW.md +109 -0
- package/skills-library/awesome-workflows/DEBUGGING-WORKFLOW.md +124 -0
- package/skills-library/awesome-workflows/Design-Review-Workflow/README.md +31 -0
- package/skills-library/awesome-workflows/Design-Review-Workflow/design-principles-example.md +129 -0
- package/skills-library/awesome-workflows/Design-Review-Workflow/design-review-agent.md +107 -0
- package/skills-library/awesome-workflows/Design-Review-Workflow/design-review-claude-md-snippet.md +24 -0
- package/skills-library/awesome-workflows/Design-Review-Workflow/design-review-slash-command.md +38 -0
- package/skills-library/awesome-workflows/PARALLEL-RESEARCH-WORKFLOW.md +89 -0
- package/skills-library/awesome-workflows/PHASE-EXECUTION-WORKFLOW.md +97 -0
- package/skills-library/awesome-workflows/SESSION-HANDOFF-WORKFLOW.md +116 -0
- package/skills-library/cms-patterns/content-branch-preview.md +515 -0
- package/skills-library/cms-patterns/inline-visual-editing.md +666 -0
- package/skills-library/cms-patterns/mdx-component-content.md +649 -0
- package/skills-library/cms-patterns/media-manager-abstraction.md +827 -0
- package/skills-library/cms-patterns/schema-driven-form-generator.md +838 -0
- package/skills-library/complexity-metrics/complexity-divider.md +707 -0
- package/skills-library/complexity-metrics/work-with-complexity.md +193 -0
- package/skills-library/creative-multimedia/animation-stack-guide.md +577 -0
- package/skills-library/creative-multimedia/audio-enhancement-pipeline.md +625 -0
- package/skills-library/creative-multimedia/content-repurposing-pipeline.md +1146 -0
- package/skills-library/creative-multimedia/data-visualization-generator.md +862 -0
- package/skills-library/creative-multimedia/doc-to-podcast-pipeline.md +2184 -0
- package/skills-library/creative-multimedia/ffmpeg-command-generator.md +405 -0
- package/skills-library/creative-multimedia/image-optimization-pipeline.md +605 -0
- package/skills-library/creative-multimedia/multi-format-content-generator.md +1759 -0
- package/skills-library/creative-multimedia/og-image-generator.md +635 -0
- package/skills-library/creative-multimedia/podcast-audio-composition.md +1355 -0
- package/skills-library/creative-multimedia/podcast-quality-evaluation.md +1452 -0
- package/skills-library/creative-multimedia/podcast-script-generation.md +1841 -0
- package/skills-library/creative-multimedia/svg-generation.md +750 -0
- package/skills-library/creative-multimedia/text-to-speech-provider-selector.md +1414 -0
- package/skills-library/creative-multimedia/transcription-pipeline-selector.md +677 -0
- package/skills-library/creative-multimedia/video-streaming-setup.md +559 -0
- package/skills-library/database-solutions/AI_RESPONSE_DATABASE_CACHING.md +520 -0
- package/skills-library/database-solutions/CONDITIONAL_SQL_MIGRATION_PATTERN.md +119 -0
- package/skills-library/database-solutions/DATABASE_COLUMN_NAME_MISMATCH.md +393 -0
- package/skills-library/database-solutions/DATABASE_SCHEMA.md +394 -0
- package/skills-library/database-solutions/DATABASE_SCHEMA_VERIFICATION_GUIDE.md +348 -0
- package/skills-library/database-solutions/DATABASE_STRATEGY.md +71 -0
- package/skills-library/database-solutions/ES_MODULE_SEED_SCRIPT_PATTERN.md +52 -0
- package/skills-library/database-solutions/MIGRATION_GUIDE.md +3 -0
- package/skills-library/database-solutions/PLPGSQL_VARIABLE_CONFLICT_FIX.md +208 -0
- package/skills-library/database-solutions/POSTGRESQL_JSONB_DOUBLE_STRINGIFY_FIX.md +245 -0
- package/skills-library/database-solutions/POSTGRESQL_LICENSE_TABLE_DESIGN.md +393 -0
- package/skills-library/database-solutions/POSTGRESQL_UUID_DOCUMENT_RAG_DUAL_SCOPE.md +732 -0
- package/skills-library/database-solutions/POSTGRES_SQL_TEMPLATE_BINDING_ERROR.md +240 -0
- package/skills-library/database-solutions/PRISMA_DB_PUSH_DATA_LOSS_PREVENTION.md +141 -0
- package/skills-library/database-solutions/PRODUCTION_QUERY_OPTIMIZATION_RESTART_FIX.md +389 -0
- package/skills-library/database-solutions/RLS_SECURITY_GUIDE.md +107 -0
- package/skills-library/database-solutions/SCHEMA_ENHANCEMENTS_GUIDE.md +373 -0
- package/skills-library/database-solutions/SCHEMA_MIGRATION_GUIDE.md +368 -0
- package/skills-library/database-solutions/SCHEMA_VERIFICATION_QUICK_REFERENCE.md +104 -0
- package/skills-library/database-solutions/ai-erd-generator.md +1213 -0
- package/skills-library/database-solutions/content-publishing-states.md +631 -0
- package/skills-library/database-solutions/database-schema-designer.md +522 -0
- package/skills-library/database-solutions/er-diagram-components.md +569 -0
- package/skills-library/database-solutions/er-to-ddl-mapping.md +1405 -0
- package/skills-library/database-solutions/erd-creator-textbook-research.md +433 -0
- package/skills-library/database-solutions/erd-react-flow-architecture.md +1965 -0
- package/skills-library/database-solutions/mariadb-aggregate-function-replacement.md +145 -0
- package/skills-library/database-solutions/normalization-validator.md +778 -0
- package/skills-library/database-solutions/postgres-full-text-search-content.md +494 -0
- package/skills-library/database-solutions/postgresql-to-mysql-runtime-translation.md +286 -0
- package/skills-library/database-solutions/regex-alternation-ordering-sql-types.md +92 -0
- package/skills-library/database-solutions/reserved-word-context-aware-quoting.md +142 -0
- package/skills-library/database-solutions/sql-ddl-generator.md +756 -0
- package/skills-library/database-solutions/supabase-connection-pooler-fix.md +102 -0
- package/skills-library/deployment-security/CPANEL_NODE_DEPLOYMENT.md +166 -0
- package/skills-library/deployment-security/DEPLOYMENT.md +275 -0
- package/skills-library/deployment-security/DEPLOYMENT_CHECKLIST.md +363 -0
- package/skills-library/deployment-security/DEPLOYMENT_PLAN.md +669 -0
- package/skills-library/deployment-security/KNEX_DATABASE_ABSTRACTION.md +444 -0
- package/skills-library/deployment-security/LICENSE_KEY_SYSTEM.md +206 -0
- package/skills-library/deployment-security/NODE18_DEPENDENCY_COMPATIBILITY.md +284 -0
- package/skills-library/deployment-security/PHP_INSTALLER_WIZARD_GUIDE.md +315 -0
- package/skills-library/deployment-security/PM2_ENVIRONMENT_VARIABLE_CACHING.md +256 -0
- package/skills-library/deployment-security/PM2_MEMORY_EXHAUSTION_FIX.md +370 -0
- package/skills-library/deployment-security/PRODUCTION_DEPLOYMENT_GUIDE.md +592 -0
- package/skills-library/deployment-security/PRODUCTION_HARDENING_DOCUMENTATION.md +307 -0
- package/skills-library/deployment-security/PRODUCTION_RECOVERY_CHERRY_PICK_PATTERN.md +202 -0
- package/skills-library/deployment-security/PYINSTALLER_CUDA_WHISPER_BUNDLING.md +236 -0
- package/skills-library/deployment-security/SECURITY.md +41 -0
- package/skills-library/deployment-security/SMTP_SSL_HOSTNAME_MISMATCH_SHARED_HOSTING.md +220 -0
- package/skills-library/deployment-security/SPA_SEO_OPTIMIZATION_CPANEL.md +200 -0
- package/skills-library/deployment-security/SUPABASE_EDGE_FUNCTIONS.md +338 -0
- package/skills-library/deployment-security/VERCEL_GITHUB_DEPLOYMENT_GUIDE.md +858 -0
- package/skills-library/deployment-security/VPS_DEPLOYMENT_READINESS.md +356 -0
- package/skills-library/deployment-security/deployment-changes-not-applying.md +241 -0
- package/skills-library/deployment-security/env-file-management-production-local.md +203 -0
- package/skills-library/deployment-security/express-secure-file-downloads.md +413 -0
- package/skills-library/deployment-security/react-production-deployment-desktop-guide.md +2011 -0
- package/skills-library/deployment-security/self-hosted-supabase-coolify-guide.md +1684 -0
- package/skills-library/deployment-security/unique-features-ai-strategy-plaid-security.md +1613 -0
- package/skills-library/deployment-security/vps-deployment.md +135 -0
- package/skills-library/document-processing/WORD_EXPORT_MARKDOWN_FORMATTING.md +482 -0
- package/skills-library/document-processing/document-ai-landingai-integration.md +677 -0
- package/skills-library/document-processing/express-secure-file-downloads-mern.md +413 -0
- package/skills-library/document-processing/express-secure-file-downloads.md +413 -0
- package/skills-library/document-processing/md-to-word-converter.md +318 -0
- package/skills-library/document-processing/pdf-forms-integration/README.md +101 -0
- package/skills-library/document-processing/pdf-forms-integration/SKILL.md +662 -0
- package/skills-library/ecommerce/ADMIN_PRODUCTS_GUIDE.md +428 -0
- package/skills-library/ecommerce/ECOMMERCE_API_REFERENCE.md +776 -0
- package/skills-library/ecommerce/ECOMMERCE_COMPLETION_SUMMARY.md +673 -0
- package/skills-library/ecommerce/ECOMMERCE_IMPLEMENTATION_GUIDE.md +729 -0
- package/skills-library/ecommerce/ECOMMERCE_QUICK_REFERENCE.md +521 -0
- package/skills-library/ecommerce/ECOMMERCE_TESTING_CHECKLIST.md +565 -0
- package/skills-library/ecommerce/ECOMMERCE_WORKFLOW_GUIDE.md +1059 -0
- package/skills-library/ecommerce/PRODUCT_CREATION_EXPANDED.md +522 -0
- package/skills-library/ecommerce/agentic-commerce-protocol.md +203 -0
- package/skills-library/ecommerce/cart-abandonment-recovery.md +236 -0
- package/skills-library/ecommerce/cart-architecture-patterns.md +300 -0
- package/skills-library/ecommerce/cart-item-count-indicator.md +264 -0
- package/skills-library/ecommerce/checkout-ux-conversion.md +227 -0
- package/skills-library/ecommerce/composable-commerce-selection.md +166 -0
- package/skills-library/ecommerce/ecommerce-analytics-patterns.md +167 -0
- package/skills-library/ecommerce/fraud-detection-patterns.md +179 -0
- package/skills-library/ecommerce/inventory-stock-management.md +270 -0
- package/skills-library/ecommerce/order-saga-state-machine.md +336 -0
- package/skills-library/ecommerce/payment-provider-abstraction.md +245 -0
- package/skills-library/ecommerce/pci-compliance-checklist.md +192 -0
- package/skills-library/ecommerce/refund-chargeback-handling.md +177 -0
- package/skills-library/ecommerce/shipping-carrier-integration.md +218 -0
- package/skills-library/ecommerce/webhook-idempotency-patterns.md +253 -0
- package/skills-library/excalidraw-diagrams/.github/workflows/ci.yml +558 -0
- package/skills-library/excalidraw-diagrams/.github/workflows/prompt-gallery.yml +448 -0
- package/skills-library/excalidraw-diagrams/.github/workflows/release.yml +42 -0
- package/skills-library/excalidraw-diagrams/.github/workflows/test-reusable-ci.yml +25 -0
- package/skills-library/excalidraw-diagrams/CLAUDE.md +57 -0
- package/skills-library/excalidraw-diagrams/LICENSE +21 -0
- package/skills-library/excalidraw-diagrams/README.md +178 -0
- package/skills-library/excalidraw-diagrams/SKILL.md +715 -0
- package/skills-library/form-solutions/BUTTON_TYPE_FORM_SUBMISSION.md +336 -0
- package/skills-library/form-solutions/FILLABLE_PDF_IMPLEMENTATION.md +226 -0
- package/skills-library/form-solutions/SURVEYJS_QUESTIONNAIRE_SYSTEM.md +367 -0
- package/skills-library/form-solutions/tiptap-minimal-setup.md +690 -0
- package/skills-library/frontend/scholarly-classification-bubble-map.md +149 -0
- package/skills-library/infrastructure/ci-cd-pipeline-builder.md +517 -0
- package/skills-library/infrastructure/observability-designer.md +264 -0
- package/skills-library/infrastructure/performance-profiler.md +621 -0
- package/skills-library/installer-wizard-patterns.md +249 -0
- package/skills-library/integrations/CLAUDE_CODE_TOKEN_ANALYTICS.md +160 -0
- package/skills-library/integrations/CONFIGURABLE_AI_PROVIDER_SELECTION.md +728 -0
- package/skills-library/integrations/SOCKET_IO_BROADCAST_ALL_VS_ROOM.md +141 -0
- package/skills-library/integrations/VIRTUAL_MEETINGS_IMPLEMENTATION.md +374 -0
- package/skills-library/integrations/WORDPRESS_LEARNDASH_DATA_RECOVERY.md +53 -0
- package/skills-library/integrations/YOUTUBE_API_SETUP.md +141 -0
- package/skills-library/integrations/YOUTUBE_BOOKMARKING_EXPLANATION.md +252 -0
- package/skills-library/integrations/YOUTUBE_BOOKMARKING_SOLUTION.md +268 -0
- package/skills-library/integrations/YOUTUBE_OAUTH_SETUP_GUIDE.md +200 -0
- package/skills-library/integrations/YOUTUBE_VIDEO_FIX_COMPLETE.md +192 -0
- package/skills-library/integrations/ai-ml/GEMINI_AI_RAG_PIPELINE_COMPLETE_GUIDE.md +195 -0
- package/skills-library/integrations/ai-ml/GEMINI_IMAGE_GENERATION_SETUP.md +64 -0
- package/skills-library/integrations/cloudflare/cloudflare-turnstile-debugging.md +202 -0
- package/skills-library/integrations/cloudflare/cloudflare-turnstile-implementation.md +476 -0
- package/skills-library/integrations/cloudflare-turnstile-debugging.md +202 -0
- package/skills-library/integrations/cloudflare-turnstile-implementation.md +476 -0
- package/skills-library/integrations/ghost-creator-monetization-pattern.md +454 -0
- package/skills-library/integrations/headless-cms-architecture.md +484 -0
- package/skills-library/integrations/headless-cms-stack-selection.md +183 -0
- package/skills-library/integrations/payload-cms-patterns.md +674 -0
- package/skills-library/integrations/realtimestt-openwakeword-cuda-windows.md +229 -0
- package/skills-library/integrations/rss-podcast-integration.md +300 -0
- package/skills-library/integrations/wordpress/WORDPRESS_LEARNDASH_DATA_RECOVERY.md +53 -0
- package/skills-library/integrations/youtube/YOUTUBE_API_SETUP.md +141 -0
- package/skills-library/integrations/youtube/YOUTUBE_BOOKMARKING_EXPLANATION.md +252 -0
- package/skills-library/integrations/youtube/YOUTUBE_BOOKMARKING_SOLUTION.md +268 -0
- package/skills-library/integrations/youtube/YOUTUBE_OAUTH_SETUP_GUIDE.md +200 -0
- package/skills-library/integrations/youtube/YOUTUBE_VIDEO_FIX_COMPLETE.md +192 -0
- package/skills-library/marketing/campaign-analytics.md +97 -0
- package/skills-library/marketing/content-creator.md +105 -0
- package/skills-library/marketing/marketing-strategy-pmm.md +94 -0
- package/skills-library/marketing/social-media-analyzer.md +81 -0
- package/skills-library/methodology/ADVANCED_ORCHESTRATION_PATTERNS.md +401 -0
- package/skills-library/methodology/AGENT_SELF_IMPROVEMENT_LOOP.md +179 -0
- package/skills-library/methodology/BREATH_BASED_PARALLEL_EXECUTION.md +1 -1
- package/skills-library/methodology/CLEANSING_CYCLE.md +358 -0
- package/skills-library/methodology/CONFIDENCE_ANNOTATION_PATTERN.md +143 -0
- package/skills-library/methodology/CRITICAL_PATTERNS_DOCUMENTATION_COMPLETE.md +204 -0
- package/skills-library/methodology/DELIVERABLES_SUMMARY.md +341 -0
- package/skills-library/methodology/DIFFICULTY_AWARE_AGENT_ROUTING.md +252 -0
- package/skills-library/methodology/EVOLUTIONARY_SKILL_SYNTHESIS.md +219 -0
- package/skills-library/methodology/GLOMERULUS_DECISION_GATE.md +223 -0
- package/skills-library/methodology/HIBERNATION_SYSTEM.md +231 -0
- package/skills-library/methodology/INSTRUMENTATION_OVER_RESTRICTION.md +192 -0
- package/skills-library/methodology/MASTER_COMPLETION_SUMMARY.md +444 -0
- package/skills-library/methodology/MASTER_SESSION_COMPLETION.md +743 -0
- package/skills-library/methodology/MERN_QUICK_REFERENCE.md +358 -0
- package/skills-library/methodology/ORGAN_AGENT_MAPPING.md +177 -0
- package/skills-library/methodology/PARALLEL_WAVE_BASED_REFACTORING.md +440 -0
- package/skills-library/methodology/QUICK_REFERENCE.md +358 -0
- package/skills-library/methodology/SDFT_ONPOLICY_SELF_DISTILLATION.md +186 -0
- package/skills-library/methodology/SELF_QUESTIONING_TASK_GENERATION.md +270 -0
- package/skills-library/methodology/SESSION_COMPLETION_SUMMARY.md +304 -0
- package/skills-library/methodology/SESSION_SUMMARY.md +432 -0
- package/skills-library/methodology/WARRIOR_WORKFLOW_DEBUGGING_PROTOCOL.md +252 -0
- package/skills-library/methodology/tech-debt-tracker.md +570 -0
- package/skills-library/parallel-debug/SKILL.md +60 -0
- package/skills-library/patterns-standards/API_PATTERN_FIX_SUMMARY.md +236 -0
- package/skills-library/patterns-standards/BATCH_OPERATIONS_WITH_PROGRESS_MODAL.md +362 -0
- package/skills-library/patterns-standards/CRITICAL_CODING_PATTERNS.md +639 -0
- package/skills-library/patterns-standards/DARK_MODE_MODAL_VISIBILITY.md +258 -0
- package/skills-library/patterns-standards/ERROR_RESILIENCE_IMPLEMENTATION.md +375 -0
- package/skills-library/patterns-standards/ES_MODULE_IMPORT_HOISTING_DOTENV.md +298 -0
- package/skills-library/patterns-standards/NESTED_BACKDROP_FILTER_CSS_ARTIFACT_FIX.md +76 -0
- package/skills-library/patterns-standards/ORDERED_DETECTOR_PIPELINE_GRACEFUL_FALLBACK.md +333 -0
- package/skills-library/patterns-standards/PHASE_IMPORT_ERROR_DEBUGGING.md +271 -0
- package/skills-library/patterns-standards/PYNPUT_GLOBAL_HOTKEY_VK_MATCHING.md +252 -0
- package/skills-library/patterns-standards/REACT_USEEFFECT_CASCADE_RESET_FIX.md +132 -0
- package/skills-library/patterns-standards/SUBMENU_HOVER_DROPDOWN_PATTERN.md +225 -0
- package/skills-library/patterns-standards/TAILWIND_TEXT_VISIBILITY_OVERRIDE.md +322 -0
- package/skills-library/patterns-standards/THEME_AWARE_CSS_VARIABLES_PATTERN.md +209 -0
- package/skills-library/patterns-standards/THEME_USER_OBJECT_PROPERTY_NAMING.md +194 -0
- package/skills-library/patterns-standards/TOOLTIP_BLOCKING_CLICKS_FIX.md +267 -0
- package/skills-library/patterns-standards/claude-code-plugin-structure.md +235 -0
- package/skills-library/patterns-standards/react-i18next-setup.md +429 -0
- package/skills-library/patterns-standards/thesys-c1-generative-ui-integration.md +967 -0
- package/skills-library/plugin-development/CLAUDE_CODE_COMMAND_REGISTRATION_SILENT_FAILURE.md +315 -0
- package/skills-library/plugin-development/plugin-command-namespace-vs-global.md +390 -0
- package/skills-library/plugin-development/plugin-doc-auto-generation.md +172 -0
- package/skills-library/security/GITHUB_REPO_SECURITY_AUDIT.md +115 -0
- package/skills-library/security/admin-deletion-safety.md +396 -0
- package/skills-library/security/application-vuln-patterns.md +477 -0
- package/skills-library/security/env-secrets-manager.md +686 -0
- package/skills-library/security/secure-ai-application-templates.md +347 -0
- package/skills-library/security/sql-injection-prevention-postgresjs.md +151 -0
- package/skills-library/supabase-connection-pooler-fix.md +102 -0
- package/skills-library/system-context/POWERSHELL_BASH_INTEROP.md +82 -0
- package/skills-library/system-context/SERVICE_LIFECYCLE_MANAGEMENT.md +119 -0
- package/skills-library/system-context/SKILL.md +40 -0
- package/skills-library/system-context/WINDOWS_DEV_ENVIRONMENT.md +73 -0
- package/skills-library/testing/E2E_PLAYWRIGHT_PATTERNS.md +99 -0
- package/skills-library/testing/INTEGRATION_TEST_STRATEGY.md +82 -0
- package/skills-library/testing/RED_GREEN_BUGFIX_GATE.md +203 -0
- package/skills-library/testing/TEST_DATA_MANAGEMENT.md +69 -0
- package/skills-library/testing/VITEST_UNIT_TEST_PATTERNS.md +75 -0
- package/skills-library/testing/playwright-api-security-tests.md +202 -0
- package/skills-library/toolbox/SKILL.md +84 -0
- package/skills-library/toolbox/code-graph-and-web-scraping-mcps.md +237 -0
- package/skills-library/ui-ux-pro-max/ACCESSIBILITY_ESSENTIALS.md +115 -0
- package/skills-library/ui-ux-pro-max/DESIGN_SYSTEM_SCAFFOLDING.md +133 -0
- package/skills-library/ui-ux-pro-max/RESPONSIVE_LAYOUT_PATTERNS.md +119 -0
- package/skills-library/ui-ux-pro-max/SKILL.md +386 -0
- package/skills-library/ui-ux-pro-max/data/charts.csv +26 -0
- package/skills-library/ui-ux-pro-max/data/colors.csv +97 -0
- package/skills-library/ui-ux-pro-max/data/icons.csv +101 -0
- package/skills-library/ui-ux-pro-max/data/landing.csv +31 -0
- package/skills-library/ui-ux-pro-max/data/products.csv +97 -0
- package/skills-library/ui-ux-pro-max/data/react-performance.csv +45 -0
- package/skills-library/ui-ux-pro-max/data/stacks/astro.csv +54 -0
- package/skills-library/ui-ux-pro-max/data/stacks/flutter.csv +53 -0
- package/skills-library/ui-ux-pro-max/data/stacks/html-tailwind.csv +56 -0
- package/skills-library/ui-ux-pro-max/data/stacks/jetpack-compose.csv +53 -0
- package/skills-library/ui-ux-pro-max/data/stacks/nextjs.csv +53 -0
- package/skills-library/ui-ux-pro-max/data/stacks/nuxt-ui.csv +51 -0
- package/skills-library/ui-ux-pro-max/data/stacks/nuxtjs.csv +59 -0
- package/skills-library/ui-ux-pro-max/data/stacks/react-native.csv +52 -0
- package/skills-library/ui-ux-pro-max/data/stacks/react.csv +54 -0
- package/skills-library/ui-ux-pro-max/data/stacks/shadcn.csv +61 -0
- package/skills-library/ui-ux-pro-max/data/stacks/svelte.csv +54 -0
- package/skills-library/ui-ux-pro-max/data/stacks/swiftui.csv +51 -0
- package/skills-library/ui-ux-pro-max/data/stacks/vue.csv +50 -0
- package/skills-library/ui-ux-pro-max/data/styles.csv +68 -0
- package/skills-library/ui-ux-pro-max/data/typography.csv +58 -0
- package/skills-library/ui-ux-pro-max/data/ui-reasoning.csv +101 -0
- package/skills-library/ui-ux-pro-max/data/ux-guidelines.csv +100 -0
- package/skills-library/ui-ux-pro-max/data/web-interface.csv +31 -0
- package/skills-library/wordpress-style-theme-components.md +1526 -0
- package/templates/ASSUMPTIONS.md +1 -1
- package/templates/DECISION_LOG.md +0 -1
- package/templates/phase-prompt.md +1 -1
- package/templates/phoenix-comparison.md +6 -6
- package/templates/skill-api-integration.md +106 -0
- package/templates/skill-architecture-pattern.md +92 -0
- package/templates/skill-debug-pattern.md +98 -0
- package/templates/skill-devops-recipe.md +107 -0
- package/templates/skill-general.md +65 -0
- package/templates/skill-ui-component.md +113 -0
- package/tools/uat-runner.py +179 -0
- package/version.json +7 -3
- package/workflows/handoff-session.md +2 -2
- package/workflows/new-project.md +2 -2
- package/workflows/plan-phase.md +1 -1
- package/.claude-plugin/plugin.json +0 -64
- package/skills-library/_general/methodology/LIVE_BREADCRUMB_PROTOCOL.md +0 -242
- package/skills-library/_general/methodology/llm-judge-memory-crud.md +0 -241
- package/skills-library/methodology/REFLEXION_MEMORY_PATTERN.md +0 -183
- package/skills-library/methodology/RESEARCH_BACKED_WORKFLOW_UPGRADE.md +0 -263
- package/skills-library/methodology/SABBATH_REST_PATTERN.md +0 -267
- package/skills-library/methodology/STONE_AND_SCAFFOLD.md +0 -220
- package/skills-library/specialists/api-architecture/api-designer.md +0 -49
- package/skills-library/specialists/api-architecture/graphql-architect.md +0 -49
- package/skills-library/specialists/api-architecture/mcp-developer.md +0 -51
- package/skills-library/specialists/api-architecture/microservices-architect.md +0 -50
- package/skills-library/specialists/api-architecture/websocket-engineer.md +0 -48
- package/skills-library/specialists/backend/django-expert.md +0 -52
- package/skills-library/specialists/backend/fastapi-expert.md +0 -52
- package/skills-library/specialists/backend/laravel-specialist.md +0 -52
- package/skills-library/specialists/backend/nestjs-expert.md +0 -51
- package/skills-library/specialists/backend/rails-expert.md +0 -53
- package/skills-library/specialists/backend/spring-boot-engineer.md +0 -56
- package/skills-library/specialists/data-ml/fine-tuning-expert.md +0 -48
- package/skills-library/specialists/data-ml/ml-pipeline.md +0 -47
- package/skills-library/specialists/data-ml/pandas-pro.md +0 -47
- package/skills-library/specialists/data-ml/rag-architect.md +0 -51
- package/skills-library/specialists/data-ml/spark-engineer.md +0 -47
- package/skills-library/specialists/frontend/angular-architect.md +0 -52
- package/skills-library/specialists/frontend/flutter-expert.md +0 -51
- package/skills-library/specialists/frontend/nextjs-developer.md +0 -54
- package/skills-library/specialists/frontend/react-native-expert.md +0 -50
- package/skills-library/specialists/frontend/vue-expert.md +0 -51
- package/skills-library/specialists/infrastructure/chaos-engineer.md +0 -74
- package/skills-library/specialists/infrastructure/cloud-architect.md +0 -70
- package/skills-library/specialists/infrastructure/database-optimizer.md +0 -64
- package/skills-library/specialists/infrastructure/devops-engineer.md +0 -70
- package/skills-library/specialists/infrastructure/kubernetes-specialist.md +0 -52
- package/skills-library/specialists/infrastructure/monitoring-expert.md +0 -70
- package/skills-library/specialists/infrastructure/sre-engineer.md +0 -70
- package/skills-library/specialists/infrastructure/terraform-engineer.md +0 -51
- package/skills-library/specialists/languages/cpp-pro.md +0 -74
- package/skills-library/specialists/languages/csharp-developer.md +0 -69
- package/skills-library/specialists/languages/dotnet-core-expert.md +0 -54
- package/skills-library/specialists/languages/golang-pro.md +0 -51
- package/skills-library/specialists/languages/java-architect.md +0 -49
- package/skills-library/specialists/languages/javascript-pro.md +0 -68
- package/skills-library/specialists/languages/kotlin-specialist.md +0 -68
- package/skills-library/specialists/languages/php-pro.md +0 -49
- package/skills-library/specialists/languages/python-pro.md +0 -52
- package/skills-library/specialists/languages/react-expert.md +0 -51
- package/skills-library/specialists/languages/rust-engineer.md +0 -50
- package/skills-library/specialists/languages/sql-pro.md +0 -56
- package/skills-library/specialists/languages/swift-expert.md +0 -69
- package/skills-library/specialists/languages/typescript-pro.md +0 -51
- package/skills-library/specialists/platform/atlassian-mcp.md +0 -52
- package/skills-library/specialists/platform/embedded-systems.md +0 -53
- package/skills-library/specialists/platform/game-developer.md +0 -53
- package/skills-library/specialists/platform/salesforce-developer.md +0 -53
- package/skills-library/specialists/platform/shopify-expert.md +0 -49
- package/skills-library/specialists/platform/wordpress-pro.md +0 -49
- package/skills-library/specialists/quality/code-documenter.md +0 -51
- package/skills-library/specialists/quality/code-reviewer.md +0 -67
- package/skills-library/specialists/quality/debugging-wizard.md +0 -51
- package/skills-library/specialists/quality/fullstack-guardian.md +0 -51
- package/skills-library/specialists/quality/legacy-modernizer.md +0 -50
- package/skills-library/specialists/quality/playwright-expert.md +0 -65
- package/skills-library/specialists/quality/spec-miner.md +0 -56
- package/skills-library/specialists/quality/test-master.md +0 -65
- package/skills-library/specialists/security/secure-code-guardian.md +0 -55
- package/skills-library/specialists/security/security-reviewer.md +0 -53
- package/skills-library/specialists/workflow/architecture-designer.md +0 -53
- package/skills-library/specialists/workflow/cli-developer.md +0 -70
- package/skills-library/specialists/workflow/feature-forge.md +0 -65
- package/skills-library/specialists/workflow/prompt-engineer.md +0 -54
- package/skills-library/specialists/workflow/the-fool.md +0 -62
- /package/skills-library/{performance → _general/performance}/cache-augmented-generation.md +0 -0
- /package/skills-library/{debugging → parallel-debug}/FAILURE_TAXONOMY_CLASSIFICATION.md +0 -0
- /package/skills-library/{debugging → parallel-debug}/THREE_AGENT_HYPOTHESIS_DEBUGGING.md +0 -0
|
@@ -0,0 +1,677 @@
|
|
|
1
|
+
# Transcription Pipeline Selector
|
|
2
|
+
## Description
|
|
3
|
+
|
|
4
|
+
Choose and integrate the right speech-to-text service for any project. Covers local (Whisper), streaming (Deepgram), intelligence-rich (AssemblyAI), and ecosystem-aligned (Gemini) providers with working integration code, pre/post-processing pipelines, and cost modeling.
|
|
5
|
+
|
|
6
|
+
## When to Use
|
|
7
|
+
|
|
8
|
+
- Adding voice input, transcription, or captioning to an application
|
|
9
|
+
- Processing audio/video files into searchable text
|
|
10
|
+
- Building voice agents or real-time transcription features
|
|
11
|
+
- Migrating between STT providers
|
|
12
|
+
- Estimating transcription costs for a project proposal
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Decision Matrix
|
|
17
|
+
|
|
18
|
+
| Provider | Speed | Accuracy | Price/min | Best For | Node SDK |
|
|
19
|
+
|----------|-------|----------|-----------|----------|----------|
|
|
20
|
+
| Whisper Turbo (local) | 6x realtime | ~96% | $0 (GPU needed) | Privacy, offline, bulk | whisper-node |
|
|
21
|
+
| Deepgram Nova-3 | Sub-300ms | Best streaming | $0.0043 | Real-time, voice agents | @deepgram/sdk |
|
|
22
|
+
| AssemblyAI | Good | 96%+ | $0.0025 | Intelligence features | assemblyai |
|
|
23
|
+
| Google Gemini | Good | 95%+ | Token-based | Already using Gemini | @google/generative-ai |
|
|
24
|
+
| OpenAI gpt-4o-transcribe | Fast | Lowest WER | $0.006 | OpenAI ecosystem | openai |
|
|
25
|
+
|
|
26
|
+
### Quick Decision Flow
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
Need real-time streaming? --> Deepgram Nova-3
|
|
30
|
+
Need speaker labels + summaries + sentiment? --> AssemblyAI
|
|
31
|
+
Need privacy / no API calls? --> Whisper Turbo (local)
|
|
32
|
+
Already using Gemini for other features? --> Gemini Audio
|
|
33
|
+
Budget is primary concern for batch? --> AssemblyAI ($0.0025/min)
|
|
34
|
+
```
|
|
35
|
+
|
|
36
|
+
---
|
|
37
|
+
|
|
38
|
+
## Integration Code
|
|
39
|
+
|
|
40
|
+
### 1. Whisper Local (faster-whisper via Python subprocess)
|
|
41
|
+
|
|
42
|
+
The most practical local approach uses faster-whisper (CTranslate2 backend). Call from Node via subprocess.
|
|
43
|
+
|
|
44
|
+
**Install (Python side):**
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
pip install faster-whisper
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
**Python transcription script (`transcribe.py`):**
|
|
51
|
+
|
|
52
|
+
```python
|
|
53
|
+
import sys
|
|
54
|
+
import json
|
|
55
|
+
from faster_whisper import WhisperModel
|
|
56
|
+
|
|
57
|
+
def transcribe(audio_path: str, model_size: str = "large-v3-turbo"):
|
|
58
|
+
model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
|
59
|
+
segments, info = model.transcribe(audio_path, beam_size=5)
|
|
60
|
+
|
|
61
|
+
results = []
|
|
62
|
+
for segment in segments:
|
|
63
|
+
results.append({
|
|
64
|
+
"start": round(segment.start, 2),
|
|
65
|
+
"end": round(segment.end, 2),
|
|
66
|
+
"text": segment.text.strip(),
|
|
67
|
+
"confidence": round(segment.avg_log_prob, 4),
|
|
68
|
+
})
|
|
69
|
+
|
|
70
|
+
return {
|
|
71
|
+
"language": info.language,
|
|
72
|
+
"language_probability": round(info.language_probability, 2),
|
|
73
|
+
"duration": round(info.duration, 2),
|
|
74
|
+
"segments": results,
|
|
75
|
+
"full_text": " ".join(s["text"] for s in results),
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
result = transcribe(sys.argv[1])
|
|
80
|
+
print(json.dumps(result))
|
|
81
|
+
```
|
|
82
|
+
|
|
83
|
+
**Node.js caller:**
|
|
84
|
+
|
|
85
|
+
```typescript
|
|
86
|
+
import { execFile } from "node:child_process";
|
|
87
|
+
import { promisify } from "node:util";
|
|
88
|
+
|
|
89
|
+
const execFileAsync = promisify(execFile);
|
|
90
|
+
|
|
91
|
+
interface TranscriptionResult {
|
|
92
|
+
language: string;
|
|
93
|
+
language_probability: number;
|
|
94
|
+
duration: number;
|
|
95
|
+
segments: Array<{
|
|
96
|
+
start: number;
|
|
97
|
+
end: number;
|
|
98
|
+
text: string;
|
|
99
|
+
confidence: number;
|
|
100
|
+
}>;
|
|
101
|
+
full_text: string;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
async function transcribeLocal(audioPath: string): Promise<TranscriptionResult> {
|
|
105
|
+
const { stdout } = await execFileAsync("python", ["transcribe.py", audioPath], {
|
|
106
|
+
maxBuffer: 50 * 1024 * 1024, // 50MB for long transcriptions
|
|
107
|
+
timeout: 600_000, // 10 min timeout
|
|
108
|
+
});
|
|
109
|
+
return JSON.parse(stdout);
|
|
110
|
+
}
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
**Alternative: whisper-node (pure Node binding):**
|
|
114
|
+
|
|
115
|
+
```typescript
|
|
116
|
+
import whisper from "whisper-node";
|
|
117
|
+
|
|
118
|
+
const transcript = await whisper("audio.wav", {
|
|
119
|
+
modelName: "large-v3-turbo",
|
|
120
|
+
whisperOptions: { language: "auto", word_timestamps: true },
|
|
121
|
+
});
|
|
122
|
+
// Returns array of { start, end, speech }
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
---
|
|
126
|
+
|
|
127
|
+
### 2. Deepgram Nova-3 (REST + WebSocket Streaming)
|
|
128
|
+
|
|
129
|
+
**Install:**
|
|
130
|
+
|
|
131
|
+
```bash
|
|
132
|
+
npm install @deepgram/sdk
|
|
133
|
+
```
|
|
134
|
+
|
|
135
|
+
**Pre-recorded (REST):**
|
|
136
|
+
|
|
137
|
+
```typescript
|
|
138
|
+
import { createClient } from "@deepgram/sdk";
|
|
139
|
+
|
|
140
|
+
const deepgram = createClient(process.env.DEEPGRAM_API_KEY!);
|
|
141
|
+
|
|
142
|
+
async function transcribeFile(filePath: string) {
|
|
143
|
+
const { result } = await deepgram.listen.prerecorded.transcribeFile(
|
|
144
|
+
fs.readFileSync(filePath),
|
|
145
|
+
{
|
|
146
|
+
model: "nova-3",
|
|
147
|
+
smart_format: true,
|
|
148
|
+
diarize: true,
|
|
149
|
+
language: "en",
|
|
150
|
+
punctuate: true,
|
|
151
|
+
utterances: true,
|
|
152
|
+
}
|
|
153
|
+
);
|
|
154
|
+
|
|
155
|
+
const transcript = result.results.channels[0].alternatives[0];
|
|
156
|
+
return {
|
|
157
|
+
text: transcript.transcript,
|
|
158
|
+
confidence: transcript.confidence,
|
|
159
|
+
words: transcript.words, // includes speaker labels when diarize=true
|
|
160
|
+
paragraphs: transcript.paragraphs,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
**Real-time WebSocket streaming:**
|
|
166
|
+
|
|
167
|
+
```typescript
|
|
168
|
+
import { createClient, LiveTranscriptionEvents } from "@deepgram/sdk";
|
|
169
|
+
|
|
170
|
+
const deepgram = createClient(process.env.DEEPGRAM_API_KEY!);
|
|
171
|
+
|
|
172
|
+
function startLiveTranscription(onTranscript: (text: string, isFinal: boolean) => void) {
|
|
173
|
+
const connection = deepgram.listen.live({
|
|
174
|
+
model: "nova-3",
|
|
175
|
+
language: "en",
|
|
176
|
+
smart_format: true,
|
|
177
|
+
interim_results: true,
|
|
178
|
+
utterance_end_ms: 1500,
|
|
179
|
+
vad_events: true,
|
|
180
|
+
encoding: "linear16",
|
|
181
|
+
sample_rate: 16000,
|
|
182
|
+
});
|
|
183
|
+
|
|
184
|
+
connection.on(LiveTranscriptionEvents.Open, () => {
|
|
185
|
+
console.log("Deepgram connection opened");
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
connection.on(LiveTranscriptionEvents.Transcript, (data) => {
|
|
189
|
+
const transcript = data.channel.alternatives[0].transcript;
|
|
190
|
+
if (transcript) {
|
|
191
|
+
onTranscript(transcript, data.is_final);
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
|
|
195
|
+
connection.on(LiveTranscriptionEvents.UtteranceEnd, () => {
|
|
196
|
+
onTranscript("", true); // Signal end of utterance
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
connection.on(LiveTranscriptionEvents.Error, (err) => {
|
|
200
|
+
console.error("Deepgram error:", err);
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
return {
|
|
204
|
+
send: (audioChunk: Buffer) => connection.send(audioChunk),
|
|
205
|
+
close: () => connection.requestClose(),
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
|
|
209
|
+
// Usage with microphone (e.g., from a WebSocket client):
|
|
210
|
+
// const live = startLiveTranscription((text, isFinal) => {
|
|
211
|
+
// if (isFinal) console.log("Final:", text);
|
|
212
|
+
// else console.log("Interim:", text);
|
|
213
|
+
// });
|
|
214
|
+
// audioStream.on("data", (chunk) => live.send(chunk));
|
|
215
|
+
```
|
|
216
|
+
|
|
217
|
+
---
|
|
218
|
+
|
|
219
|
+
### 3. AssemblyAI (Upload + Poll + Webhooks)
|
|
220
|
+
|
|
221
|
+
**Install:**
|
|
222
|
+
|
|
223
|
+
```bash
|
|
224
|
+
npm install assemblyai
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
**Basic transcription with intelligence features:**
|
|
228
|
+
|
|
229
|
+
```typescript
|
|
230
|
+
import { AssemblyAI } from "assemblyai";
|
|
231
|
+
|
|
232
|
+
const client = new AssemblyAI({ apiKey: process.env.ASSEMBLYAI_API_KEY! });
|
|
233
|
+
|
|
234
|
+
async function transcribeWithIntelligence(audioUrl: string) {
|
|
235
|
+
const transcript = await client.transcripts.transcribe({
|
|
236
|
+
audio_url: audioUrl,
|
|
237
|
+
speaker_labels: true,
|
|
238
|
+
auto_chapters: true,
|
|
239
|
+
sentiment_analysis: true,
|
|
240
|
+
entity_detection: true,
|
|
241
|
+
auto_highlights: true,
|
|
242
|
+
language_detection: true,
|
|
243
|
+
});
|
|
244
|
+
|
|
245
|
+
if (transcript.status === "error") {
|
|
246
|
+
throw new Error(`Transcription failed: ${transcript.error}`);
|
|
247
|
+
}
|
|
248
|
+
|
|
249
|
+
return {
|
|
250
|
+
text: transcript.text,
|
|
251
|
+
confidence: transcript.confidence,
|
|
252
|
+
speakers: transcript.utterances, // speaker-labeled segments
|
|
253
|
+
chapters: transcript.chapters, // auto-generated chapters with summaries
|
|
254
|
+
sentiment: transcript.sentiment_analysis_results,
|
|
255
|
+
entities: transcript.entities,
|
|
256
|
+
highlights: transcript.auto_highlights_result,
|
|
257
|
+
};
|
|
258
|
+
}
|
|
259
|
+
```
|
|
260
|
+
|
|
261
|
+
**With webhook (for long files):**
|
|
262
|
+
|
|
263
|
+
```typescript
|
|
264
|
+
async function transcribeAsync(audioUrl: string, webhookUrl: string) {
|
|
265
|
+
const transcript = await client.transcripts.submit({
|
|
266
|
+
audio_url: audioUrl,
|
|
267
|
+
webhook_url: webhookUrl,
|
|
268
|
+
speaker_labels: true,
|
|
269
|
+
});
|
|
270
|
+
|
|
271
|
+
return transcript.id; // Poll or wait for webhook
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
// Express webhook handler
|
|
275
|
+
app.post("/webhooks/assemblyai", async (req, res) => {
|
|
276
|
+
const { transcript_id, status } = req.body;
|
|
277
|
+
if (status === "completed") {
|
|
278
|
+
const transcript = await client.transcripts.get(transcript_id);
|
|
279
|
+
// Process completed transcript
|
|
280
|
+
}
|
|
281
|
+
res.sendStatus(200);
|
|
282
|
+
});
|
|
283
|
+
```
|
|
284
|
+
|
|
285
|
+
**Polling pattern:**
|
|
286
|
+
|
|
287
|
+
```typescript
|
|
288
|
+
async function pollTranscript(transcriptId: string) {
|
|
289
|
+
const transcript = await client.transcripts.get(transcriptId);
|
|
290
|
+
|
|
291
|
+
if (transcript.status === "completed") return transcript;
|
|
292
|
+
if (transcript.status === "error") throw new Error(transcript.error);
|
|
293
|
+
|
|
294
|
+
// Still processing — wait and retry
|
|
295
|
+
await new Promise((r) => setTimeout(r, 3000));
|
|
296
|
+
return pollTranscript(transcriptId);
|
|
297
|
+
}
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
---
|
|
301
|
+
|
|
302
|
+
### 4. Google Gemini Audio
|
|
303
|
+
|
|
304
|
+
Since the user prefers Gemini/Claude, this is the most ecosystem-aligned option.
|
|
305
|
+
|
|
306
|
+
**Install:**
|
|
307
|
+
|
|
308
|
+
```bash
|
|
309
|
+
npm install @google/generative-ai
|
|
310
|
+
```
|
|
311
|
+
|
|
312
|
+
**Transcribe with Gemini:**
|
|
313
|
+
|
|
314
|
+
```typescript
|
|
315
|
+
import { GoogleGenerativeAI } from "@google/generative-ai";
|
|
316
|
+
import fs from "node:fs";
|
|
317
|
+
|
|
318
|
+
const genAI = new GoogleGenerativeAI(process.env.GEMINI_API_KEY!);
|
|
319
|
+
|
|
320
|
+
async function transcribeWithGemini(audioPath: string) {
|
|
321
|
+
const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" });
|
|
322
|
+
|
|
323
|
+
const audioData = fs.readFileSync(audioPath);
|
|
324
|
+
const base64Audio = audioData.toString("base64");
|
|
325
|
+
|
|
326
|
+
// Determine MIME type
|
|
327
|
+
const ext = audioPath.split(".").pop()?.toLowerCase();
|
|
328
|
+
const mimeMap: Record<string, string> = {
|
|
329
|
+
mp3: "audio/mpeg",
|
|
330
|
+
wav: "audio/wav",
|
|
331
|
+
m4a: "audio/mp4",
|
|
332
|
+
ogg: "audio/ogg",
|
|
333
|
+
flac: "audio/flac",
|
|
334
|
+
webm: "audio/webm",
|
|
335
|
+
};
|
|
336
|
+
const mimeType = mimeMap[ext || ""] || "audio/mpeg";
|
|
337
|
+
|
|
338
|
+
const result = await model.generateContent([
|
|
339
|
+
{
|
|
340
|
+
inlineData: {
|
|
341
|
+
mimeType,
|
|
342
|
+
data: base64Audio,
|
|
343
|
+
},
|
|
344
|
+
},
|
|
345
|
+
{
|
|
346
|
+
text: `Transcribe this audio accurately. Return a JSON object with:
|
|
347
|
+
- "text": the full transcription
|
|
348
|
+
- "segments": array of { "timestamp": "MM:SS", "text": "segment text" }
|
|
349
|
+
- "language": detected language code
|
|
350
|
+
- "summary": one-paragraph summary
|
|
351
|
+
|
|
352
|
+
Return ONLY valid JSON, no markdown fences.`,
|
|
353
|
+
},
|
|
354
|
+
]);
|
|
355
|
+
|
|
356
|
+
return JSON.parse(result.response.text());
|
|
357
|
+
}
|
|
358
|
+
```
|
|
359
|
+
|
|
360
|
+
**Gemini with speaker diarization prompt:**
|
|
361
|
+
|
|
362
|
+
```typescript
|
|
363
|
+
async function transcribeWithSpeakers(audioPath: string, speakerCount?: number) {
|
|
364
|
+
const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" });
|
|
365
|
+
|
|
366
|
+
const audioData = fs.readFileSync(audioPath);
|
|
367
|
+
|
|
368
|
+
const result = await model.generateContent([
|
|
369
|
+
{
|
|
370
|
+
inlineData: {
|
|
371
|
+
mimeType: "audio/wav",
|
|
372
|
+
data: audioData.toString("base64"),
|
|
373
|
+
},
|
|
374
|
+
},
|
|
375
|
+
{
|
|
376
|
+
text: `Transcribe this audio with speaker identification.
|
|
377
|
+
${speakerCount ? `There are ${speakerCount} speakers.` : "Detect the number of speakers."}
|
|
378
|
+
|
|
379
|
+
Return JSON:
|
|
380
|
+
{
|
|
381
|
+
"speakers_detected": number,
|
|
382
|
+
"utterances": [
|
|
383
|
+
{ "speaker": "Speaker 1", "start": "0:00", "end": "0:05", "text": "..." }
|
|
384
|
+
],
|
|
385
|
+
"full_text": "Speaker 1: ... Speaker 2: ..."
|
|
386
|
+
}
|
|
387
|
+
|
|
388
|
+
Return ONLY valid JSON.`,
|
|
389
|
+
},
|
|
390
|
+
]);
|
|
391
|
+
|
|
392
|
+
return JSON.parse(result.response.text());
|
|
393
|
+
}
|
|
394
|
+
```
|
|
395
|
+
|
|
396
|
+
> **Note:** Gemini audio has a file size limit (~20MB inline). For larger files, use the File API to upload first, then reference the file URI.
|
|
397
|
+
|
|
398
|
+
---
|
|
399
|
+
|
|
400
|
+
## Pre-processing Best Practices
|
|
401
|
+
|
|
402
|
+
### Convert to 16kHz Mono WAV (FFmpeg)
|
|
403
|
+
|
|
404
|
+
All STT engines perform best with 16kHz mono WAV input. This normalizes any source format:
|
|
405
|
+
|
|
406
|
+
```bash
|
|
407
|
+
# Single file
|
|
408
|
+
ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
|
|
409
|
+
|
|
410
|
+
# Batch convert directory
|
|
411
|
+
for f in *.mp3; do
|
|
412
|
+
ffmpeg -i "$f" -ar 16000 -ac 1 -c:a pcm_s16le "${f%.mp3}.wav"
|
|
413
|
+
done
|
|
414
|
+
```
|
|
415
|
+
|
|
416
|
+
**Node.js wrapper using fluent-ffmpeg:**
|
|
417
|
+
|
|
418
|
+
```typescript
|
|
419
|
+
import ffmpeg from "fluent-ffmpeg";
|
|
420
|
+
|
|
421
|
+
function preprocessAudio(inputPath: string, outputPath: string): Promise<void> {
|
|
422
|
+
return new Promise((resolve, reject) => {
|
|
423
|
+
ffmpeg(inputPath)
|
|
424
|
+
.audioFrequency(16000)
|
|
425
|
+
.audioChannels(1)
|
|
426
|
+
.audioCodec("pcm_s16le")
|
|
427
|
+
.format("wav")
|
|
428
|
+
.on("end", resolve)
|
|
429
|
+
.on("error", reject)
|
|
430
|
+
.save(outputPath);
|
|
431
|
+
});
|
|
432
|
+
}
|
|
433
|
+
```
|
|
434
|
+
|
|
435
|
+
### Split Long Files into 10-Minute Chunks
|
|
436
|
+
|
|
437
|
+
API providers have file size and duration limits. Split at silence boundaries for clean segments:
|
|
438
|
+
|
|
439
|
+
```bash
|
|
440
|
+
# Split at 10-minute intervals with 2-second overlap
|
|
441
|
+
ffmpeg -i long_audio.wav -f segment -segment_time 600 \
|
|
442
|
+
-c:a pcm_s16le -ar 16000 -ac 1 chunk_%03d.wav
|
|
443
|
+
```
|
|
444
|
+
|
|
445
|
+
**Smart splitting at silence boundaries:**
|
|
446
|
+
|
|
447
|
+
```typescript
|
|
448
|
+
import ffmpeg from "fluent-ffmpeg";
|
|
449
|
+
|
|
450
|
+
async function splitAtSilence(inputPath: string, outputDir: string): Promise<string[]> {
|
|
451
|
+
return new Promise((resolve, reject) => {
|
|
452
|
+
const chunks: string[] = [];
|
|
453
|
+
|
|
454
|
+
ffmpeg(inputPath)
|
|
455
|
+
.audioFilters("silencedetect=noise=-30dB:d=0.5")
|
|
456
|
+
.format("null")
|
|
457
|
+
.output("/dev/null")
|
|
458
|
+
.on("stderr", (line: string) => {
|
|
459
|
+
// Parse silence timestamps from FFmpeg stderr
|
|
460
|
+
const match = line.match(/silence_end: ([\d.]+)/);
|
|
461
|
+
if (match) chunks.push(match[1]);
|
|
462
|
+
})
|
|
463
|
+
.on("end", () => resolve(chunks))
|
|
464
|
+
.on("error", reject)
|
|
465
|
+
.run();
|
|
466
|
+
});
|
|
467
|
+
}
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
### Detect Language Before Choosing Model
|
|
471
|
+
|
|
472
|
+
```typescript
|
|
473
|
+
import { AssemblyAI } from "assemblyai";
|
|
474
|
+
|
|
475
|
+
// AssemblyAI auto-detects language
|
|
476
|
+
const transcript = await client.transcripts.transcribe({
|
|
477
|
+
audio_url: url,
|
|
478
|
+
language_detection: true,
|
|
479
|
+
});
|
|
480
|
+
console.log(transcript.language_code); // "en", "es", "fr", etc.
|
|
481
|
+
|
|
482
|
+
// Whisper auto-detects on first 30 seconds
|
|
483
|
+
// Pass language="auto" or omit the language parameter
|
|
484
|
+
|
|
485
|
+
// Deepgram: set detect_language=true
|
|
486
|
+
const { result } = await deepgram.listen.prerecorded.transcribeFile(buffer, {
|
|
487
|
+
model: "nova-3",
|
|
488
|
+
detect_language: true,
|
|
489
|
+
});
|
|
490
|
+
```
|
|
491
|
+
|
|
492
|
+
---
|
|
493
|
+
|
|
494
|
+
## Post-processing
|
|
495
|
+
|
|
496
|
+
### Punctuation Restoration
|
|
497
|
+
|
|
498
|
+
Most cloud APIs handle this natively (`smart_format: true` for Deepgram, default for AssemblyAI). For local Whisper output that needs cleanup:
|
|
499
|
+
|
|
500
|
+
```typescript
|
|
501
|
+
// Use Gemini to restore punctuation and fix casing
|
|
502
|
+
async function restorePunctuation(rawText: string): Promise<string> {
|
|
503
|
+
const model = genAI.getGenerativeModel({ model: "gemini-2.0-flash" });
|
|
504
|
+
const result = await model.generateContent(
|
|
505
|
+
`Fix punctuation, capitalization, and paragraph breaks in this transcript. ` +
|
|
506
|
+
`Do NOT change any words, only add punctuation and formatting:\n\n${rawText}`
|
|
507
|
+
);
|
|
508
|
+
return result.response.text();
|
|
509
|
+
}
|
|
510
|
+
```
|
|
511
|
+
|
|
512
|
+
### Speaker Diarization
|
|
513
|
+
|
|
514
|
+
For providers without built-in diarization, use pyannote.audio (best open-source diarization):
|
|
515
|
+
|
|
516
|
+
```python
|
|
517
|
+
from pyannote.audio import Pipeline
|
|
518
|
+
|
|
519
|
+
pipeline = Pipeline.from_pretrained(
|
|
520
|
+
"pyannote/speaker-diarization-3.1",
|
|
521
|
+
use_auth_token="YOUR_HF_TOKEN"
|
|
522
|
+
)
|
|
523
|
+
|
|
524
|
+
diarization = pipeline("audio.wav")
|
|
525
|
+
|
|
526
|
+
for turn, _, speaker in diarization.itertracks(yield_label=True):
|
|
527
|
+
print(f"[{turn.start:.1f} - {turn.end:.1f}] {speaker}")
|
|
528
|
+
```
|
|
529
|
+
|
|
530
|
+
### Timestamp Alignment for Subtitles
|
|
531
|
+
|
|
532
|
+
**Generate SRT format:**
|
|
533
|
+
|
|
534
|
+
```typescript
|
|
535
|
+
interface Segment {
|
|
536
|
+
start: number;
|
|
537
|
+
end: number;
|
|
538
|
+
text: string;
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
function toSRT(segments: Segment[]): string {
|
|
542
|
+
return segments
|
|
543
|
+
.map((seg, i) => {
|
|
544
|
+
const formatTime = (seconds: number) => {
|
|
545
|
+
const h = Math.floor(seconds / 3600);
|
|
546
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
547
|
+
const s = Math.floor(seconds % 60);
|
|
548
|
+
const ms = Math.round((seconds % 1) * 1000);
|
|
549
|
+
return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")},${String(ms).padStart(3, "0")}`;
|
|
550
|
+
};
|
|
551
|
+
return `${i + 1}\n${formatTime(seg.start)} --> ${formatTime(seg.end)}\n${seg.text}\n`;
|
|
552
|
+
})
|
|
553
|
+
.join("\n");
|
|
554
|
+
}
|
|
555
|
+
|
|
556
|
+
// Output:
|
|
557
|
+
// 1
|
|
558
|
+
// 00:00:00,000 --> 00:00:04,520
|
|
559
|
+
// Hello and welcome to today's session.
|
|
560
|
+
```
|
|
561
|
+
|
|
562
|
+
**Generate VTT format:**
|
|
563
|
+
|
|
564
|
+
```typescript
|
|
565
|
+
function toVTT(segments: Segment[]): string {
|
|
566
|
+
const formatTime = (seconds: number) => {
|
|
567
|
+
const h = Math.floor(seconds / 3600);
|
|
568
|
+
const m = Math.floor((seconds % 3600) / 60);
|
|
569
|
+
const s = Math.floor(seconds % 60);
|
|
570
|
+
const ms = Math.round((seconds % 1) * 1000);
|
|
571
|
+
return `${String(h).padStart(2, "0")}:${String(m).padStart(2, "0")}:${String(s).padStart(2, "0")}.${String(ms).padStart(3, "0")}`;
|
|
572
|
+
};
|
|
573
|
+
|
|
574
|
+
const cues = segments
|
|
575
|
+
.map((seg) => `${formatTime(seg.start)} --> ${formatTime(seg.end)}\n${seg.text}`)
|
|
576
|
+
.join("\n\n");
|
|
577
|
+
|
|
578
|
+
return `WEBVTT\n\n${cues}\n`;
|
|
579
|
+
}
|
|
580
|
+
```
|
|
581
|
+
|
|
582
|
+
### Confidence Scoring
|
|
583
|
+
|
|
584
|
+
```typescript
|
|
585
|
+
interface QualityReport {
|
|
586
|
+
overall_confidence: number;
|
|
587
|
+
low_confidence_segments: Array<{ start: number; end: number; text: string; confidence: number }>;
|
|
588
|
+
recommendation: string;
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
function assessTranscriptQuality(segments: Segment & { confidence: number }[]): QualityReport {
|
|
592
|
+
const avgConfidence = segments.reduce((sum, s) => sum + s.confidence, 0) / segments.length;
|
|
593
|
+
const lowConfidence = segments.filter((s) => s.confidence < 0.85);
|
|
594
|
+
|
|
595
|
+
return {
|
|
596
|
+
overall_confidence: Math.round(avgConfidence * 100) / 100,
|
|
597
|
+
low_confidence_segments: lowConfidence,
|
|
598
|
+
recommendation:
|
|
599
|
+
avgConfidence > 0.95
|
|
600
|
+
? "High quality — safe for automated processing"
|
|
601
|
+
: avgConfidence > 0.85
|
|
602
|
+
? "Good quality — spot-check flagged segments"
|
|
603
|
+
: "Low quality — manual review recommended. Consider re-recording or using a different model.",
|
|
604
|
+
};
|
|
605
|
+
}
|
|
606
|
+
```
|
|
607
|
+
|
|
608
|
+
---
|
|
609
|
+
|
|
610
|
+
## Cost Calculator
|
|
611
|
+
|
|
612
|
+
### Formula
|
|
613
|
+
|
|
614
|
+
```
|
|
615
|
+
Monthly Cost = (hours_of_audio * 60) * price_per_minute
|
|
616
|
+
```
|
|
617
|
+
|
|
618
|
+
### Quick Reference Table
|
|
619
|
+
|
|
620
|
+
| Provider | 10 hrs/mo | 100 hrs/mo | 1,000 hrs/mo |
|
|
621
|
+
|----------|-----------|------------|--------------|
|
|
622
|
+
| Whisper (local) | $0 | $0 | $0 (GPU: ~$0.10/hr on cloud) |
|
|
623
|
+
| Deepgram Nova-3 | $2.58 | $25.80 | $258.00 |
|
|
624
|
+
| AssemblyAI | $1.50 | $15.00 | $150.00 |
|
|
625
|
+
| Gemini | ~$0.50* | ~$5.00* | ~$50.00* |
|
|
626
|
+
| OpenAI gpt-4o-transcribe | $3.60 | $36.00 | $360.00 |
|
|
627
|
+
|
|
628
|
+
*Gemini pricing is token-based and varies by audio length/complexity.
|
|
629
|
+
|
|
630
|
+
### Cost Estimation Function
|
|
631
|
+
|
|
632
|
+
```typescript
|
|
633
|
+
interface CostEstimate {
|
|
634
|
+
provider: string;
|
|
635
|
+
monthly_hours: number;
|
|
636
|
+
monthly_cost: number;
|
|
637
|
+
annual_cost: number;
|
|
638
|
+
cost_per_hour: number;
|
|
639
|
+
}
|
|
640
|
+
|
|
641
|
+
function estimateCost(monthlyHours: number): CostEstimate[] {
|
|
642
|
+
const providers = [
|
|
643
|
+
{ name: "Whisper (local)", pricePerMin: 0 },
|
|
644
|
+
{ name: "Deepgram Nova-3", pricePerMin: 0.0043 },
|
|
645
|
+
{ name: "AssemblyAI", pricePerMin: 0.0025 },
|
|
646
|
+
{ name: "Google Gemini", pricePerMin: 0.0008 },
|
|
647
|
+
{ name: "OpenAI gpt-4o-transcribe", pricePerMin: 0.006 },
|
|
648
|
+
];
|
|
649
|
+
|
|
650
|
+
return providers.map((p) => {
|
|
651
|
+
const monthly = monthlyHours * 60 * p.pricePerMin;
|
|
652
|
+
return {
|
|
653
|
+
provider: p.name,
|
|
654
|
+
monthly_hours: monthlyHours,
|
|
655
|
+
monthly_cost: Math.round(monthly * 100) / 100,
|
|
656
|
+
annual_cost: Math.round(monthly * 12 * 100) / 100,
|
|
657
|
+
cost_per_hour: Math.round(p.pricePerMin * 60 * 100) / 100,
|
|
658
|
+
};
|
|
659
|
+
});
|
|
660
|
+
}
|
|
661
|
+
```
|
|
662
|
+
|
|
663
|
+
---
|
|
664
|
+
|
|
665
|
+
## Common Pitfalls
|
|
666
|
+
|
|
667
|
+
1. **Sending stereo audio to APIs** — doubles file size, no accuracy gain. Always convert to mono first.
|
|
668
|
+
2. **Not handling API rate limits** — Deepgram allows 100 concurrent streams; AssemblyAI queues automatically. Build retry logic with exponential backoff.
|
|
669
|
+
3. **Ignoring audio quality** — background noise kills accuracy. Apply noise reduction before transcription:
|
|
670
|
+
```bash
|
|
671
|
+
ffmpeg -i noisy.wav -af "afftdn=nf=-25" clean.wav
|
|
672
|
+
```
|
|
673
|
+
4. **Choosing Whisper local without a GPU** — CPU inference is 20-50x slower. On CPU, use `tiny` or `base` models only.
|
|
674
|
+
5. **Not chunking long files** — most APIs have 2-hour or file-size limits. Split proactively.
|
|
675
|
+
|
|
676
|
+
---
|
|
677
|
+
|