@framers/agentos 0.1.74 → 0.1.76
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +139 -34
- package/dist/core/agency/AgentCommunicationBus.d.ts +1 -0
- package/dist/core/agency/AgentCommunicationBus.d.ts.map +1 -1
- package/dist/core/agency/AgentCommunicationBus.js +62 -8
- package/dist/core/agency/AgentCommunicationBus.js.map +1 -1
- package/dist/core/agency/IAgentCommunicationBus.d.ts +1 -1
- package/dist/core/agency/IAgentCommunicationBus.d.ts.map +1 -1
- package/dist/orchestration/builders/index.d.ts +1 -1
- package/dist/orchestration/builders/index.d.ts.map +1 -1
- package/dist/orchestration/builders/index.js +1 -1
- package/dist/orchestration/builders/index.js.map +1 -1
- package/dist/orchestration/builders/nodes.d.ts +15 -0
- package/dist/orchestration/builders/nodes.d.ts.map +1 -1
- package/dist/orchestration/builders/nodes.js +33 -0
- package/dist/orchestration/builders/nodes.js.map +1 -1
- package/dist/orchestration/runtime/LoopController.d.ts +10 -10
- package/dist/orchestration/runtime/LoopController.d.ts.map +1 -1
- package/dist/orchestration/runtime/LoopController.js +1 -1
- package/dist/orchestration/runtime/LoopController.js.map +1 -1
- package/dist/orchestration/runtime/index.d.ts +1 -1
- package/dist/orchestration/runtime/index.d.ts.map +1 -1
- package/dist/orchestration/runtime/index.js.map +1 -1
- package/dist/speech/FallbackProxy.d.ts +104 -0
- package/dist/speech/FallbackProxy.d.ts.map +1 -0
- package/dist/speech/FallbackProxy.js +151 -0
- package/dist/speech/FallbackProxy.js.map +1 -0
- package/dist/speech/SpeechProviderResolver.d.ts +103 -0
- package/dist/speech/SpeechProviderResolver.d.ts.map +1 -0
- package/dist/speech/SpeechProviderResolver.js +256 -0
- package/dist/speech/SpeechProviderResolver.js.map +1 -0
- package/dist/speech/SpeechRuntime.d.ts +23 -1
- package/dist/speech/SpeechRuntime.d.ts.map +1 -1
- package/dist/speech/SpeechRuntime.js +82 -8
- package/dist/speech/SpeechRuntime.js.map +1 -1
- package/dist/speech/index.d.ts +6 -0
- package/dist/speech/index.d.ts.map +1 -1
- package/dist/speech/index.js +6 -0
- package/dist/speech/index.js.map +1 -1
- package/dist/speech/providerCatalog.d.ts.map +1 -1
- package/dist/speech/providerCatalog.js +15 -1
- package/dist/speech/providerCatalog.js.map +1 -1
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts +49 -0
- package/dist/speech/providers/AssemblyAISTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js +151 -0
- package/dist/speech/providers/AssemblyAISTTProvider.js.map +1 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts +48 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.js +90 -0
- package/dist/speech/providers/AzureSpeechSTTProvider.js.map +1 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts +60 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.d.ts.map +1 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.js +127 -0
- package/dist/speech/providers/AzureSpeechTTSProvider.js.map +1 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts +55 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.d.ts.map +1 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.js +102 -0
- package/dist/speech/providers/DeepgramBatchSTTProvider.js.map +1 -0
- package/dist/speech/types.d.ts +35 -0
- package/dist/speech/types.d.ts.map +1 -1
- package/dist/voice/CallManager.d.ts +1 -1
- package/dist/voice/CallManager.d.ts.map +1 -1
- package/dist/voice/CallManager.js +9 -0
- package/dist/voice/CallManager.js.map +1 -1
- package/dist/voice/MediaStreamParser.d.ts +83 -0
- package/dist/voice/MediaStreamParser.d.ts.map +1 -0
- package/dist/voice/MediaStreamParser.js +2 -0
- package/dist/voice/MediaStreamParser.js.map +1 -0
- package/dist/voice/TelephonyStreamTransport.d.ts +112 -0
- package/dist/voice/TelephonyStreamTransport.d.ts.map +1 -0
- package/dist/voice/TelephonyStreamTransport.js +208 -0
- package/dist/voice/TelephonyStreamTransport.js.map +1 -0
- package/dist/voice/index.d.ts +10 -0
- package/dist/voice/index.d.ts.map +1 -1
- package/dist/voice/index.js +11 -0
- package/dist/voice/index.js.map +1 -1
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts +43 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.js +92 -0
- package/dist/voice/parsers/PlivoMediaStreamParser.js.map +1 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts +51 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.js +103 -0
- package/dist/voice/parsers/TelnyxMediaStreamParser.js.map +1 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts +50 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.d.ts.map +1 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.js +144 -0
- package/dist/voice/parsers/TwilioMediaStreamParser.js.map +1 -0
- package/dist/voice/providers/plivo.d.ts +77 -0
- package/dist/voice/providers/plivo.d.ts.map +1 -0
- package/dist/voice/providers/plivo.js +180 -0
- package/dist/voice/providers/plivo.js.map +1 -0
- package/dist/voice/providers/telnyx.d.ts +93 -0
- package/dist/voice/providers/telnyx.d.ts.map +1 -0
- package/dist/voice/providers/telnyx.js +193 -0
- package/dist/voice/providers/telnyx.js.map +1 -0
- package/dist/voice/providers/twilio.d.ts +79 -0
- package/dist/voice/providers/twilio.d.ts.map +1 -0
- package/dist/voice/providers/twilio.js +191 -0
- package/dist/voice/providers/twilio.js.map +1 -0
- package/dist/voice/twiml.d.ts +69 -0
- package/dist/voice/twiml.d.ts.map +1 -0
- package/dist/voice/twiml.js +92 -0
- package/dist/voice/twiml.js.map +1 -0
- package/dist/voice/types.d.ts +9 -1
- package/dist/voice/types.d.ts.map +1 -1
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts +90 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.d.ts.map +1 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.js +123 -0
- package/dist/voice-pipeline/AcousticEndpointDetector.js.map +1 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts +67 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.d.ts.map +1 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.js +55 -0
- package/dist/voice-pipeline/HardCutBargeinHandler.js.map +1 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts +128 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.d.ts.map +1 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.js +240 -0
- package/dist/voice-pipeline/HeuristicEndpointDetector.js.map +1 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts +96 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.d.ts.map +1 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js +69 -0
- package/dist/voice-pipeline/SoftFadeBargeinHandler.js.map +1 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts +122 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.d.ts.map +1 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js +317 -0
- package/dist/voice-pipeline/VoicePipelineOrchestrator.js.map +1 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts +148 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.d.ts.map +1 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.js +207 -0
- package/dist/voice-pipeline/WebSocketStreamTransport.js.map +1 -0
- package/dist/voice-pipeline/index.d.ts +13 -0
- package/dist/voice-pipeline/index.d.ts.map +1 -0
- package/dist/voice-pipeline/index.js +13 -0
- package/dist/voice-pipeline/index.js.map +1 -0
- package/dist/voice-pipeline/types.d.ts +905 -0
- package/dist/voice-pipeline/types.d.ts.map +1 -0
- package/dist/voice-pipeline/types.js +23 -0
- package/dist/voice-pipeline/types.js.map +1 -0
- package/package.json +6 -1
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"providerCatalog.js","sourceRoot":"","sources":["../../src/speech/providerCatalog.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,uBAAuB,GAA0C;IAC5E;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,CAAC;QACpD,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,uDAAuD;QACpE,aAAa,EAAE,cAAc;QAC7B,QAAQ,EAAE,CAAC,WAAW,EAAE,UAAU,EAAE,eAAe,CAAC;KACrD;IACD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,CAAC,gBAAgB,EAAE,sBAAsB,CAAC;QACnD,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,iDAAiD;QAC9D,aAAa,EAAE,cAAc;QAC7B,QAAQ,EAAE,CAAC,WAAW,EAAE,KAAK,EAAE,eAAe,CAAC;KAChD;IACD;QACE,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,CAAC,eAAe,EAAE,kBAAkB,CAAC;QAC9C,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,iDAAiD;QAC9D,aAAa,EAAE,aAAa;QAC5B,QAAQ,EAAE,CAAC,WAAW,EAAE,KAAK,CAAC;KAC/B;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,gBAAgB;QACvB,OAAO,EAAE,CAAC,gBAAgB,CAAC;QAC3B,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,qEAAqE;QAClF,YAAY,EAAE,WAAW;QACzB,SAAS,EAAE,KAAK;QAChB,QAAQ,EAAE,CAAC,OAAO,EAAE,YAAY,EAAE,eAAe,CAAC;KACnD;IACD;QACE,EAAE,EAAE,UAAU;QACd,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,UAAU;QACjB,OAAO,EAAE,CAAC,kBAAkB,CAAC;QAC7B,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,wDAAwD;QACrE,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,CAAC,oBAAoB,CAAC;QAC/B,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,uDAAuD;QACpE,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,aAAa,CAAC;KAChD;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,CAAC,wBAAwB,CAAC;QACnC,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,kCAAkC;QAC/C,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,CAAC,kBAAkB,EAAE,qBAAqB,CAAC;QACpD,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,4DAA4D;QACzE,SAAS,EAAE,
|
|
1
|
+
{"version":3,"file":"providerCatalog.js","sourceRoot":"","sources":["../../src/speech/providerCatalog.ts"],"names":[],"mappings":"AAEA,MAAM,CAAC,MAAM,uBAAuB,GAA0C;IAC5E;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,CAAC,oBAAoB,EAAE,mBAAmB,CAAC;QACpD,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,uDAAuD;QACpE,aAAa,EAAE,cAAc;QAC7B,QAAQ,EAAE,CAAC,WAAW,EAAE,UAAU,EAAE,eAAe,CAAC;KACrD;IACD;QACE,EAAE,EAAE,QAAQ;QACZ,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,QAAQ;QACf,OAAO,EAAE,CAAC,gBAAgB,EAAE,sBAAsB,CAAC;QACnD,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,iDAAiD;QAC9D,aAAa,EAAE,cAAc;QAC7B,QAAQ,EAAE,CAAC,WAAW,EAAE,KAAK,EAAE,eAAe,CAAC;KAChD;IACD;QACE,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,CAAC,eAAe,EAAE,kBAAkB,CAAC;QAC9C,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,iDAAiD;QAC9D,aAAa,EAAE,aAAa;QAC5B,QAAQ,EAAE,CAAC,WAAW,EAAE,KAAK,CAAC;KAC/B;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,gBAAgB;QACvB,OAAO,EAAE,CAAC,gBAAgB,CAAC;QAC3B,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,qEAAqE;QAClF,YAAY,EAAE,WAAW;QACzB,SAAS,EAAE,KAAK;QAChB,QAAQ,EAAE,CAAC,OAAO,EAAE,YAAY,EAAE,eAAe,CAAC;KACnD;IACD;QACE,EAAE,EAAE,UAAU;QACd,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,UAAU;QACjB,OAAO,EAAE,CAAC,kBAAkB,CAAC;QAC7B,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,wDAAwD;QACrE,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD;QACE,EAAE,EAAE,gBAAgB;QACpB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,gBAAgB;QACvB,OAAO,EAAE,CAAC,kBAAkB,CAAC;QAC7B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,KAAK;QAChB,WAAW,EAAE,6CAA6C;QAC1D,QAAQ,EAAE,CAAC,OAAO,EAAE,aAAa,EAAE,YAAY,CAAC;KACjD;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,CAAC,oBAAoB,CAAC;QAC/B,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,uDAAuD;QACpE,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,EAAE,aAAa,CAAC;KAChD;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,CAAC,wBAAwB,CAAC;QACnC,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,kCAAkC;QAC/C,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,CAAC,kBAAkB,EAAE,qBAAqB,CAAC;QACpD,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,4DAA4D;QACzE,SAAS,EAAE,KAAK;QAChB,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD;QACE,EAAE,EAAE,eAAe;QACnB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,aAAa;QACpB,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,0CAA0C;QACvD,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,CAAC;KAC/B;IACD;QACE,EAAE,EAAE,MAAM;QACV,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,MAAM;QACb,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,8CAA8C;QAC3D,SAAS,EAAE,IAAI;QACf,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,WAAW,CAAC;KAC5C;IACD;QACE,EAAE,EAAE,aAAa;QACjB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,aAAa;QACpB,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,oCAAoC;QACjD,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,CAAC;QAC9B,SAAS,EAAE,KAAK;KACjB;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,CAAC,gBAAgB,CAAC;QAC3B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,mDAAmD;QAChE,YAAY,EAAE,OAAO;QACrB,YAAY,EAAE,MAAM;QACpB,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;KAC3B;IACD;QACE,EAAE,EAAE,YAAY;QAChB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,CAAC,oBAAoB,CAAC;QAC/B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,6CAA6C;QAC1D,aAAa,EAAE,iBAAiB;QAChC,YAAY,EAAE,wBAAwB;QACtC,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,eAAe,CAAC;KAC5C;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,CAAC,wBAAwB,CAAC;QACnC,KAAK,EAAE,KAAK;QACZ,WAAW,EAAE,qCAAqC;QAClD,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;KAC3B;IACD;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,cAAc;QACrB,OAAO,EAAE,CAAC,mBAAmB,EAAE,uBAAuB,CAAC;QACvD,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,qCAAqC;QAClD,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;KAC3B;IACD;QACE,EAAE,EAAE,kBAAkB;QACtB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,kBAAkB;QACzB,OAAO,EAAE,CAAC,kBAAkB,EAAE,qBAAqB,CAAC;QACpD,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,8BAA8B;QAC3C,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;KAC3B;IACD;QACE,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,OAAO;QACd,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,8BAA8B;QAC3C,QAAQ,EAAE,CAAC,OAAO,EAAE,SAAS,EAAE,KAAK,CAAC;KACtC;IACD;QACE,EAAE,EAAE,OAAO;QACX,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,YAAY;QACnB,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,sCAAsC;QACnD,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,eAAe,CAAC;QAC3C,SAAS,EAAE,KAAK;KACjB;IACD;QACE,EAAE,EAAE,MAAM;QACV,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,MAAM;QACb,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,yCAAyC;QACtD,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;QAC1B,SAAS,EAAE,KAAK;KACjB;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,WAAW;QAClB,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,8CAA8C;QAC3D,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,CAAC;QAC1B,SAAS,EAAE,KAAK;KACjB;IACD;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,sBAAsB;QAC7B,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,4DAA4D;QACzE,QAAQ,EAAE,CAAC,OAAO,EAAE,KAAK,EAAE,UAAU,CAAC;KACvC;IACD;QACE,EAAE,EAAE,WAAW;QACf,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,WAAW;QAClB,OAAO,EAAE,CAAC,sBAAsB,CAAC;QACjC,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,8CAA8C;QAC3D,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD;QACE,EAAE,EAAE,cAAc;QAClB,IAAI,EAAE,WAAW;QACjB,KAAK,EAAE,cAAc;QACrB,OAAO,EAAE,EAAE;QACX,KAAK,EAAE,IAAI;QACX,WAAW,EAAE,uCAAuC;QACpD,QAAQ,EAAE,CAAC,OAAO,EAAE,WAAW,CAAC;KACjC;IACD,2EAA2E;IAC3E;QACE,EAAE,EAAE,oBAAoB;QACxB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,oBAAoB;QAC3B,OAAO,EAAE,CAAC,kBAAkB,CAAC;QAC7B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,iGAAiG;QAC9G,aAAa,EAAE,wBAAwB;QACvC,QAAQ,EAAE,CAAC,WAAW,EAAE,iBAAiB,EAAE,aAAa,EAAE,aAAa,EAAE,aAAa,CAAC;KACxF;IACD;QACE,EAAE,EAAE,iBAAiB;QACrB,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,2BAA2B;QAClC,OAAO,EAAE,CAAC,gBAAgB,CAAC;QAC3B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,8EAA8E;QAC3F,aAAa,EAAE,uBAAuB;QACtC,QAAQ,EAAE,CAAC,WAAW,EAAE,iBAAiB,CAAC;KAC3C;IACD;QACE,EAAE,EAAE,sBAAsB;QAC1B,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,sBAAsB;QAC7B,OAAO,EAAE,CAAC,gBAAgB,CAAC;QAC3B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,wEAAwE;QACrF,aAAa,EAAE,sBAAsB;QACrC,YAAY,EAAE,MAAM;QACpB,QAAQ,EAAE,CAAC,WAAW,EAAE,mBAAmB,CAAC;KAC7C;IACD;QACE,EAAE,EAAE,0BAA0B;QAC9B,IAAI,EAAE,KAAK;QACX,KAAK,EAAE,0BAA0B;QACjC,OAAO,EAAE,CAAC,oBAAoB,CAAC;QAC/B,KAAK,EAAE,KAAK;QACZ,SAAS,EAAE,IAAI;QACf,WAAW,EAAE,qEAAqE;QAClF,aAAa,EAAE,0BAA0B;QACzC,QAAQ,EAAE,CAAC,WAAW,EAAE,WAAW,EAAE,oBAAoB,CAAC;KAC3D;CACO,CAAC;AAEX,MAAM,UAAU,wBAAwB,CACtC,IAAyB;IAEzB,IAAI,CAAC,IAAI;QAAE,OAAO,CAAC,GAAG,uBAAuB,CAAC,CAAC;IAC/C,OAAO,uBAAuB,CAAC,MAAM,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,IAAI,KAAK,IAAI,CAAC,CAAC;AACxE,CAAC;AAED,MAAM,UAAU,sBAAsB;IACpC,OAAO,CAAC,WAAW,EAAE,KAAK,EAAE,KAAK,EAAE,KAAK,EAAE,WAAW,CAAC,CAAC;AACzD,CAAC;AAED,MAAM,UAAU,8BAA8B,CAC5C,EAAU;IAEV,OAAO,uBAAuB,CAAC,IAAI,CAAC,CAAC,KAAK,EAAE,EAAE,CAAC,KAAK,CAAC,EAAE,KAAK,EAAE,CAAC,CAAC;AAClE,CAAC;AAED,MAAM,UAAU,0BAA0B,CACxC,KAAiC,EACjC,MAA0C,OAAO,CAAC,GAAG;IAErD,OAAO,KAAK,CAAC,KAAK,IAAI,KAAK,CAAC,OAAO,CAAC,KAAK,CAAC,CAAC,MAAM,EAAE,EAAE,CAAC,OAAO,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,CAAC;AAC9E,CAAC"}
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
import type { SpeechAudioInput, SpeechToTextProvider, SpeechTranscriptionOptions, SpeechTranscriptionResult } from '../types.js';
|
|
2
|
+
/** Configuration for the AssemblyAISTTProvider. */
|
|
3
|
+
export interface AssemblyAISTTProviderConfig {
|
|
4
|
+
/** AssemblyAI API key. */
|
|
5
|
+
apiKey: string;
|
|
6
|
+
/**
|
|
7
|
+
* Custom fetch implementation, useful for testing.
|
|
8
|
+
* Defaults to the global `fetch`.
|
|
9
|
+
*/
|
|
10
|
+
fetchImpl?: typeof fetch;
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Speech-to-text provider that uses the AssemblyAI async transcription API.
|
|
14
|
+
*
|
|
15
|
+
* The three-step workflow is:
|
|
16
|
+
* 1. **Upload** – POST the raw audio to `/v2/upload` to obtain an upload URL.
|
|
17
|
+
* 2. **Submit** – POST to `/v2/transcript` with the upload URL to start processing.
|
|
18
|
+
* 3. **Poll** – GET `/v2/transcript/:id` every second until `status` is
|
|
19
|
+
* `completed` or `error`, or until the optional timeout elapses.
|
|
20
|
+
*
|
|
21
|
+
* @example
|
|
22
|
+
* ```ts
|
|
23
|
+
* const provider = new AssemblyAISTTProvider({ apiKey: process.env.ASSEMBLYAI_API_KEY! });
|
|
24
|
+
* const result = await provider.transcribe({ data: audioBuffer }, { enableSpeakerDiarization: true });
|
|
25
|
+
* console.log(result.text);
|
|
26
|
+
* ```
|
|
27
|
+
*/
|
|
28
|
+
export declare class AssemblyAISTTProvider implements SpeechToTextProvider {
|
|
29
|
+
private readonly config;
|
|
30
|
+
readonly id = "assemblyai";
|
|
31
|
+
readonly displayName = "AssemblyAI";
|
|
32
|
+
readonly supportsStreaming = false;
|
|
33
|
+
private readonly fetchImpl;
|
|
34
|
+
constructor(config: AssemblyAISTTProviderConfig);
|
|
35
|
+
/** Returns the human-readable provider name. */
|
|
36
|
+
getProviderName(): string;
|
|
37
|
+
/**
|
|
38
|
+
* Transcribes an audio buffer via the AssemblyAI async pipeline.
|
|
39
|
+
*
|
|
40
|
+
* @param audio - Raw audio data and associated metadata.
|
|
41
|
+
* @param options - Optional transcription settings. Pass
|
|
42
|
+
* `providerSpecificOptions.signal` (an {@link AbortSignal}) to cancel.
|
|
43
|
+
* @returns A promise resolving to the normalised transcription result.
|
|
44
|
+
* @throws When the API returns a non-2xx status, when transcription fails,
|
|
45
|
+
* or when the 120-second timeout is exceeded.
|
|
46
|
+
*/
|
|
47
|
+
transcribe(audio: SpeechAudioInput, options?: SpeechTranscriptionOptions): Promise<SpeechTranscriptionResult>;
|
|
48
|
+
}
|
|
49
|
+
//# sourceMappingURL=AssemblyAISTTProvider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AssemblyAISTTProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AssemblyAISTTProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACpB,0BAA0B,EAC1B,yBAAyB,EAE1B,MAAM,aAAa,CAAC;AAErB,mDAAmD;AACnD,MAAM,WAAW,2BAA2B;IAC1C,0BAA0B;IAC1B,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAsDD;;;;;;;;;;;;;;;GAeG;AACH,qBAAa,qBAAsB,YAAW,oBAAoB;IAOpD,OAAO,CAAC,QAAQ,CAAC,MAAM;IANnC,SAAgB,EAAE,gBAAgB;IAClC,SAAgB,WAAW,gBAAgB;IAC3C,SAAgB,iBAAiB,SAAS;IAE1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;gBAEZ,MAAM,EAAE,2BAA2B;IAIhE,gDAAgD;IAChD,eAAe,IAAI,MAAM;IAIzB;;;;;;;;;OASG;IACG,UAAU,CACd,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,0BAA+B,GACvC,OAAO,CAAC,yBAAyB,CAAC;CAqGtC"}
|
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
const ASSEMBLYAI_BASE = 'https://api.assemblyai.com/v2';
|
|
2
|
+
/** Maximum time (ms) to wait for a transcript before rejecting. */
|
|
3
|
+
const DEFAULT_TIMEOUT_MS = 120000;
|
|
4
|
+
/** Polling interval (ms) between transcript status checks. */
|
|
5
|
+
const POLL_INTERVAL_MS = 1000;
|
|
6
|
+
/**
|
|
7
|
+
* Maps AssemblyAI word objects to {@link SpeechTranscriptionSegment} entries.
|
|
8
|
+
*
|
|
9
|
+
* Each word becomes its own segment so that per-word timing and speaker
|
|
10
|
+
* attribution are preserved in the normalised result.
|
|
11
|
+
*/
|
|
12
|
+
function wordsToSegments(words) {
|
|
13
|
+
return words.map((w) => ({
|
|
14
|
+
text: w.text,
|
|
15
|
+
startTime: w.start / 1000, // AssemblyAI returns milliseconds
|
|
16
|
+
endTime: w.end / 1000,
|
|
17
|
+
confidence: w.confidence,
|
|
18
|
+
speaker: w.speaker ?? undefined,
|
|
19
|
+
words: [
|
|
20
|
+
{
|
|
21
|
+
word: w.text,
|
|
22
|
+
start: w.start / 1000,
|
|
23
|
+
end: w.end / 1000,
|
|
24
|
+
confidence: w.confidence,
|
|
25
|
+
},
|
|
26
|
+
],
|
|
27
|
+
}));
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Speech-to-text provider that uses the AssemblyAI async transcription API.
|
|
31
|
+
*
|
|
32
|
+
* The three-step workflow is:
|
|
33
|
+
* 1. **Upload** – POST the raw audio to `/v2/upload` to obtain an upload URL.
|
|
34
|
+
* 2. **Submit** – POST to `/v2/transcript` with the upload URL to start processing.
|
|
35
|
+
* 3. **Poll** – GET `/v2/transcript/:id` every second until `status` is
|
|
36
|
+
* `completed` or `error`, or until the optional timeout elapses.
|
|
37
|
+
*
|
|
38
|
+
* @example
|
|
39
|
+
* ```ts
|
|
40
|
+
* const provider = new AssemblyAISTTProvider({ apiKey: process.env.ASSEMBLYAI_API_KEY! });
|
|
41
|
+
* const result = await provider.transcribe({ data: audioBuffer }, { enableSpeakerDiarization: true });
|
|
42
|
+
* console.log(result.text);
|
|
43
|
+
* ```
|
|
44
|
+
*/
|
|
45
|
+
export class AssemblyAISTTProvider {
|
|
46
|
+
constructor(config) {
|
|
47
|
+
this.config = config;
|
|
48
|
+
this.id = 'assemblyai';
|
|
49
|
+
this.displayName = 'AssemblyAI';
|
|
50
|
+
this.supportsStreaming = false;
|
|
51
|
+
this.fetchImpl = config.fetchImpl ?? fetch;
|
|
52
|
+
}
|
|
53
|
+
/** Returns the human-readable provider name. */
|
|
54
|
+
getProviderName() {
|
|
55
|
+
return this.displayName;
|
|
56
|
+
}
|
|
57
|
+
/**
|
|
58
|
+
* Transcribes an audio buffer via the AssemblyAI async pipeline.
|
|
59
|
+
*
|
|
60
|
+
* @param audio - Raw audio data and associated metadata.
|
|
61
|
+
* @param options - Optional transcription settings. Pass
|
|
62
|
+
* `providerSpecificOptions.signal` (an {@link AbortSignal}) to cancel.
|
|
63
|
+
* @returns A promise resolving to the normalised transcription result.
|
|
64
|
+
* @throws When the API returns a non-2xx status, when transcription fails,
|
|
65
|
+
* or when the 120-second timeout is exceeded.
|
|
66
|
+
*/
|
|
67
|
+
async transcribe(audio, options = {}) {
|
|
68
|
+
const signal = options.providerSpecificOptions?.signal;
|
|
69
|
+
const timeoutMs = DEFAULT_TIMEOUT_MS;
|
|
70
|
+
// ── Step 1: Upload audio ────────────────────────────────────────────────
|
|
71
|
+
const uploadResponse = await this.fetchImpl(`${ASSEMBLYAI_BASE}/upload`, {
|
|
72
|
+
method: 'POST',
|
|
73
|
+
headers: {
|
|
74
|
+
Authorization: this.config.apiKey,
|
|
75
|
+
'Content-Type': audio.mimeType ?? 'audio/wav',
|
|
76
|
+
},
|
|
77
|
+
body: audio.data,
|
|
78
|
+
signal,
|
|
79
|
+
});
|
|
80
|
+
if (!uploadResponse.ok) {
|
|
81
|
+
const msg = await uploadResponse.text();
|
|
82
|
+
throw new Error(`AssemblyAI upload failed (${uploadResponse.status}): ${msg}`);
|
|
83
|
+
}
|
|
84
|
+
const { upload_url } = (await uploadResponse.json());
|
|
85
|
+
// ── Step 2: Submit transcript request ───────────────────────────────────
|
|
86
|
+
const submitPayload = {
|
|
87
|
+
audio_url: upload_url,
|
|
88
|
+
speaker_labels: options.enableSpeakerDiarization ?? false,
|
|
89
|
+
};
|
|
90
|
+
if (options.language)
|
|
91
|
+
submitPayload.language_code = options.language;
|
|
92
|
+
const submitResponse = await this.fetchImpl(`${ASSEMBLYAI_BASE}/transcript`, {
|
|
93
|
+
method: 'POST',
|
|
94
|
+
headers: {
|
|
95
|
+
Authorization: this.config.apiKey,
|
|
96
|
+
'Content-Type': 'application/json',
|
|
97
|
+
},
|
|
98
|
+
body: JSON.stringify(submitPayload),
|
|
99
|
+
signal,
|
|
100
|
+
});
|
|
101
|
+
if (!submitResponse.ok) {
|
|
102
|
+
const msg = await submitResponse.text();
|
|
103
|
+
throw new Error(`AssemblyAI transcript submit failed (${submitResponse.status}): ${msg}`);
|
|
104
|
+
}
|
|
105
|
+
const { id } = (await submitResponse.json());
|
|
106
|
+
// ── Step 3: Poll until completed ────────────────────────────────────────
|
|
107
|
+
const deadline = Date.now() + timeoutMs;
|
|
108
|
+
while (true) {
|
|
109
|
+
if (signal?.aborted) {
|
|
110
|
+
throw new Error('AssemblyAI transcription aborted by caller signal');
|
|
111
|
+
}
|
|
112
|
+
if (Date.now() >= deadline) {
|
|
113
|
+
throw new Error(`AssemblyAI transcription timed out after ${timeoutMs / 1000}s (transcript id: ${id})`);
|
|
114
|
+
}
|
|
115
|
+
const pollResponse = await this.fetchImpl(`${ASSEMBLYAI_BASE}/transcript/${id}`, {
|
|
116
|
+
headers: { Authorization: this.config.apiKey },
|
|
117
|
+
signal,
|
|
118
|
+
});
|
|
119
|
+
if (!pollResponse.ok) {
|
|
120
|
+
const msg = await pollResponse.text();
|
|
121
|
+
throw new Error(`AssemblyAI poll failed (${pollResponse.status}): ${msg}`);
|
|
122
|
+
}
|
|
123
|
+
const transcript = (await pollResponse.json());
|
|
124
|
+
if (transcript.status === 'error') {
|
|
125
|
+
throw new Error(`AssemblyAI transcription error: ${transcript.error ?? 'unknown error'}`);
|
|
126
|
+
}
|
|
127
|
+
if (transcript.status === 'completed') {
|
|
128
|
+
const text = transcript.text ?? '';
|
|
129
|
+
const durationSeconds = transcript.audio_duration ?? audio.durationSeconds;
|
|
130
|
+
const words = transcript.words ?? [];
|
|
131
|
+
return {
|
|
132
|
+
text,
|
|
133
|
+
language: transcript.language_code ?? options.language,
|
|
134
|
+
durationSeconds,
|
|
135
|
+
confidence: transcript.confidence ?? undefined,
|
|
136
|
+
cost: 0,
|
|
137
|
+
segments: words.length > 0 ? wordsToSegments(words) : undefined,
|
|
138
|
+
providerResponse: transcript,
|
|
139
|
+
isFinal: true,
|
|
140
|
+
usage: {
|
|
141
|
+
durationMinutes: (durationSeconds ?? 0) / 60,
|
|
142
|
+
modelUsed: 'assemblyai',
|
|
143
|
+
},
|
|
144
|
+
};
|
|
145
|
+
}
|
|
146
|
+
// Still queued or processing — wait before polling again.
|
|
147
|
+
await new Promise((resolve) => setTimeout(resolve, POLL_INTERVAL_MS));
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
//# sourceMappingURL=AssemblyAISTTProvider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AssemblyAISTTProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/AssemblyAISTTProvider.ts"],"names":[],"mappings":"AAyCA,MAAM,eAAe,GAAG,+BAA+B,CAAC;AACxD,mEAAmE;AACnE,MAAM,kBAAkB,GAAG,MAAO,CAAC;AACnC,8DAA8D;AAC9D,MAAM,gBAAgB,GAAG,IAAK,CAAC;AAE/B;;;;;GAKG;AACH,SAAS,eAAe,CAAC,KAAuB;IAC9C,OAAO,KAAK,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC;QACvB,IAAI,EAAE,CAAC,CAAC,IAAI;QACZ,SAAS,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI,EAAE,kCAAkC;QAC7D,OAAO,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;QACrB,UAAU,EAAE,CAAC,CAAC,UAAU;QACxB,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,SAAS;QAC/B,KAAK,EAAE;YACL;gBACE,IAAI,EAAE,CAAC,CAAC,IAAI;gBACZ,KAAK,EAAE,CAAC,CAAC,KAAK,GAAG,IAAI;gBACrB,GAAG,EAAE,CAAC,CAAC,GAAG,GAAG,IAAI;gBACjB,UAAU,EAAE,CAAC,CAAC,UAAU;aACzB;SACF;KACF,CAAC,CAAC,CAAC;AACN,CAAC;AAED;;;;;;;;;;;;;;;GAeG;AACH,MAAM,OAAO,qBAAqB;IAOhC,YAA6B,MAAmC;QAAnC,WAAM,GAAN,MAAM,CAA6B;QANhD,OAAE,GAAG,YAAY,CAAC;QAClB,gBAAW,GAAG,YAAY,CAAC;QAC3B,sBAAiB,GAAG,KAAK,CAAC;QAKxC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;IAC7C,CAAC;IAED,gDAAgD;IAChD,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;;;OASG;IACH,KAAK,CAAC,UAAU,CACd,KAAuB,EACvB,UAAsC,EAAE;QAExC,MAAM,MAAM,GAAG,OAAO,CAAC,uBAAuB,EAAE,MAAiC,CAAC;QAClF,MAAM,SAAS,GAAG,kBAAkB,CAAC;QAErC,2EAA2E;QAC3E,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,SAAS,EAAE;YACvE,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBACjC,cAAc,EAAE,KAAK,CAAC,QAAQ,IAAI,WAAW;aAC9C;YACD,IAAI,EAAE,KAAK,CAAC,IAA2B;YACvC,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,6BAA6B,cAAc,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QACjF,CAAC;QAED,MAAM,EAAE,UAAU,EAAE,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAA2B,CAAC;QAE/E,2EAA2E;QAC3E,MAAM,aAAa,GAA4B;YAC7C,SAAS,EAAE,UAAU;YACrB,cAAc,EAAE,OAAO,CAAC,wBAAwB,IAAI,KAAK;SAC1D,CAAC;QACF,IAAI,OAAO,CAAC,QAAQ;YAAE,aAAa,CAAC,aAAa,GAAG,OAAO,CAAC,QAAQ,CAAC;QAErE,MAAM,cAAc,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,aAAa,EAAE;YAC3E,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM;gBACjC,cAAc,EAAE,kBAAkB;aACnC;YACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC,aAAa,CAAC;YACnC,MAAM;SACP,CAAC,CAAC;QAEH,IAAI,CAAC,cAAc,CAAC,EAAE,EAAE,CAAC;YACvB,MAAM,GAAG,GAAG,MAAM,cAAc,CAAC,IAAI,EAAE,CAAC;YACxC,MAAM,IAAI,KAAK,CAAC,wCAAwC,cAAc,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;QAC5F,CAAC;QAED,MAAM,EAAE,EAAE,EAAE,GAAG,CAAC,MAAM,cAAc,CAAC,IAAI,EAAE,CAAmB,CAAC;QAE/D,2EAA2E;QAC3E,MAAM,QAAQ,GAAG,IAAI,CAAC,GAAG,EAAE,GAAG,SAAS,CAAC;QAExC,OAAO,IAAI,EAAE,CAAC;YACZ,IAAI,MAAM,EAAE,OAAO,EAAE,CAAC;gBACpB,MAAM,IAAI,KAAK,CAAC,mDAAmD,CAAC,CAAC;YACvE,CAAC;YAED,IAAI,IAAI,CAAC,GAAG,EAAE,IAAI,QAAQ,EAAE,CAAC;gBAC3B,MAAM,IAAI,KAAK,CACb,4CAA4C,SAAS,GAAG,IAAI,qBAAqB,EAAE,GAAG,CACvF,CAAC;YACJ,CAAC;YAED,MAAM,YAAY,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,eAAe,eAAe,EAAE,EAAE,EAAE;gBAC/E,OAAO,EAAE,EAAE,aAAa,EAAE,IAAI,CAAC,MAAM,CAAC,MAAM,EAAE;gBAC9C,MAAM;aACP,CAAC,CAAC;YAEH,IAAI,CAAC,YAAY,CAAC,EAAE,EAAE,CAAC;gBACrB,MAAM,GAAG,GAAG,MAAM,YAAY,CAAC,IAAI,EAAE,CAAC;gBACtC,MAAM,IAAI,KAAK,CAAC,2BAA2B,YAAY,CAAC,MAAM,MAAM,GAAG,EAAE,CAAC,CAAC;YAC7E,CAAC;YAED,MAAM,UAAU,GAAG,CAAC,MAAM,YAAY,CAAC,IAAI,EAAE,CAAyB,CAAC;YAEvE,IAAI,UAAU,CAAC,MAAM,KAAK,OAAO,EAAE,CAAC;gBAClC,MAAM,IAAI,KAAK,CAAC,mCAAmC,UAAU,CAAC,KAAK,IAAI,eAAe,EAAE,CAAC,CAAC;YAC5F,CAAC;YAED,IAAI,UAAU,CAAC,MAAM,KAAK,WAAW,EAAE,CAAC;gBACtC,MAAM,IAAI,GAAG,UAAU,CAAC,IAAI,IAAI,EAAE,CAAC;gBACnC,MAAM,eAAe,GAAG,UAAU,CAAC,cAAc,IAAI,KAAK,CAAC,eAAe,CAAC;gBAC3E,MAAM,KAAK,GAAG,UAAU,CAAC,KAAK,IAAI,EAAE,CAAC;gBAErC,OAAO;oBACL,IAAI;oBACJ,QAAQ,EAAE,UAAU,CAAC,aAAa,IAAI,OAAO,CAAC,QAAQ;oBACtD,eAAe;oBACf,UAAU,EAAE,UAAU,CAAC,UAAU,IAAI,SAAS;oBAC9C,IAAI,EAAE,CAAC;oBACP,QAAQ,EAAE,KAAK,CAAC,MAAM,GAAG,CAAC,CAAC,CAAC,CAAC,eAAe,CAAC,KAAK,CAAC,CAAC,CAAC,CAAC,SAAS;oBAC/D,gBAAgB,EAAE,UAAU;oBAC5B,OAAO,EAAE,IAAI;oBACb,KAAK,EAAE;wBACL,eAAe,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,EAAE;wBAC5C,SAAS,EAAE,YAAY;qBACxB;iBACF,CAAC;YACJ,CAAC;YAED,0DAA0D;YAC1D,MAAM,IAAI,OAAO,CAAO,CAAC,OAAO,EAAE,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,gBAAgB,CAAC,CAAC,CAAC;QAC9E,CAAC;IACH,CAAC;CACF"}
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
import type { SpeechAudioInput, SpeechToTextProvider, SpeechTranscriptionOptions, SpeechTranscriptionResult } from '../types.js';
|
|
2
|
+
/** Configuration for the AzureSpeechSTTProvider. */
|
|
3
|
+
export interface AzureSpeechSTTProviderConfig {
|
|
4
|
+
/** Azure Cognitive Services subscription key. */
|
|
5
|
+
key: string;
|
|
6
|
+
/** Azure region, e.g. `'eastus'` or `'westeurope'`. */
|
|
7
|
+
region: string;
|
|
8
|
+
/**
|
|
9
|
+
* Custom fetch implementation, useful for testing.
|
|
10
|
+
* Defaults to the global `fetch`.
|
|
11
|
+
*/
|
|
12
|
+
fetchImpl?: typeof fetch;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Speech-to-text provider that uses the Azure Cognitive Services Speech REST API.
|
|
16
|
+
*
|
|
17
|
+
* Sends WAV audio as a raw binary body and returns a normalised
|
|
18
|
+
* {@link SpeechTranscriptionResult}. A `RecognitionStatus` of `'NoMatch'`
|
|
19
|
+
* is mapped to an empty text result rather than an error, matching the
|
|
20
|
+
* Azure SDK behaviour.
|
|
21
|
+
*
|
|
22
|
+
* @example
|
|
23
|
+
* ```ts
|
|
24
|
+
* const provider = new AzureSpeechSTTProvider({ key: process.env.AZURE_SPEECH_KEY!, region: 'eastus' });
|
|
25
|
+
* const result = await provider.transcribe({ data: wavBuffer });
|
|
26
|
+
* console.log(result.text);
|
|
27
|
+
* ```
|
|
28
|
+
*/
|
|
29
|
+
export declare class AzureSpeechSTTProvider implements SpeechToTextProvider {
|
|
30
|
+
private readonly config;
|
|
31
|
+
readonly id = "azure-speech-stt";
|
|
32
|
+
readonly displayName = "Azure Speech (STT)";
|
|
33
|
+
readonly supportsStreaming = false;
|
|
34
|
+
private readonly fetchImpl;
|
|
35
|
+
constructor(config: AzureSpeechSTTProviderConfig);
|
|
36
|
+
/** Returns the human-readable provider name. */
|
|
37
|
+
getProviderName(): string;
|
|
38
|
+
/**
|
|
39
|
+
* Transcribes an audio buffer using the Azure Speech recognition REST endpoint.
|
|
40
|
+
*
|
|
41
|
+
* @param audio - Raw audio data. Azure expects PCM WAV; pass `mimeType: 'audio/wav'`.
|
|
42
|
+
* @param options - Optional transcription settings (language…).
|
|
43
|
+
* @returns A promise resolving to the normalised transcription result.
|
|
44
|
+
* @throws When the Azure API returns a non-2xx status.
|
|
45
|
+
*/
|
|
46
|
+
transcribe(audio: SpeechAudioInput, options?: SpeechTranscriptionOptions): Promise<SpeechTranscriptionResult>;
|
|
47
|
+
}
|
|
48
|
+
//# sourceMappingURL=AzureSpeechSTTProvider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AzureSpeechSTTProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AzureSpeechSTTProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,gBAAgB,EAChB,oBAAoB,EACpB,0BAA0B,EAC1B,yBAAyB,EAC1B,MAAM,aAAa,CAAC;AAErB,oDAAoD;AACpD,MAAM,WAAW,4BAA4B;IAC3C,iDAAiD;IACjD,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAoBD;;;;;;;;;;;;;;GAcG;AACH,qBAAa,sBAAuB,YAAW,oBAAoB;IAOrD,OAAO,CAAC,QAAQ,CAAC,MAAM;IANnC,SAAgB,EAAE,sBAAsB;IACxC,SAAgB,WAAW,wBAAwB;IACnD,SAAgB,iBAAiB,SAAS;IAE1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;gBAEZ,MAAM,EAAE,4BAA4B;IAIjE,gDAAgD;IAChD,eAAe,IAAI,MAAM;IAIzB;;;;;;;OAOG;IACG,UAAU,CACd,KAAK,EAAE,gBAAgB,EACvB,OAAO,GAAE,0BAA+B,GACvC,OAAO,CAAC,yBAAyB,CAAC;CA0DtC"}
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
/** Converts Azure 100-nanosecond ticks to seconds. */
|
|
2
|
+
function ticksToSeconds(ticks) {
|
|
3
|
+
return ticks / 10000000;
|
|
4
|
+
}
|
|
5
|
+
/**
|
|
6
|
+
* Speech-to-text provider that uses the Azure Cognitive Services Speech REST API.
|
|
7
|
+
*
|
|
8
|
+
* Sends WAV audio as a raw binary body and returns a normalised
|
|
9
|
+
* {@link SpeechTranscriptionResult}. A `RecognitionStatus` of `'NoMatch'`
|
|
10
|
+
* is mapped to an empty text result rather than an error, matching the
|
|
11
|
+
* Azure SDK behaviour.
|
|
12
|
+
*
|
|
13
|
+
* @example
|
|
14
|
+
* ```ts
|
|
15
|
+
* const provider = new AzureSpeechSTTProvider({ key: process.env.AZURE_SPEECH_KEY!, region: 'eastus' });
|
|
16
|
+
* const result = await provider.transcribe({ data: wavBuffer });
|
|
17
|
+
* console.log(result.text);
|
|
18
|
+
* ```
|
|
19
|
+
*/
|
|
20
|
+
export class AzureSpeechSTTProvider {
|
|
21
|
+
constructor(config) {
|
|
22
|
+
this.config = config;
|
|
23
|
+
this.id = 'azure-speech-stt';
|
|
24
|
+
this.displayName = 'Azure Speech (STT)';
|
|
25
|
+
this.supportsStreaming = false;
|
|
26
|
+
this.fetchImpl = config.fetchImpl ?? fetch;
|
|
27
|
+
}
|
|
28
|
+
/** Returns the human-readable provider name. */
|
|
29
|
+
getProviderName() {
|
|
30
|
+
return this.displayName;
|
|
31
|
+
}
|
|
32
|
+
/**
|
|
33
|
+
* Transcribes an audio buffer using the Azure Speech recognition REST endpoint.
|
|
34
|
+
*
|
|
35
|
+
* @param audio - Raw audio data. Azure expects PCM WAV; pass `mimeType: 'audio/wav'`.
|
|
36
|
+
* @param options - Optional transcription settings (language…).
|
|
37
|
+
* @returns A promise resolving to the normalised transcription result.
|
|
38
|
+
* @throws When the Azure API returns a non-2xx status.
|
|
39
|
+
*/
|
|
40
|
+
async transcribe(audio, options = {}) {
|
|
41
|
+
const lang = options.language ?? 'en-US';
|
|
42
|
+
const { key, region } = this.config;
|
|
43
|
+
const url = `https://${region}.stt.speech.microsoft.com` +
|
|
44
|
+
`/speech/recognition/conversation/cognitiveservices/v1` +
|
|
45
|
+
`?language=${encodeURIComponent(lang)}`;
|
|
46
|
+
const response = await this.fetchImpl(url, {
|
|
47
|
+
method: 'POST',
|
|
48
|
+
headers: {
|
|
49
|
+
'Ocp-Apim-Subscription-Key': key,
|
|
50
|
+
'Content-Type': 'audio/wav',
|
|
51
|
+
},
|
|
52
|
+
body: audio.data,
|
|
53
|
+
});
|
|
54
|
+
if (!response.ok) {
|
|
55
|
+
const message = await response.text();
|
|
56
|
+
throw new Error(`Azure Speech STT failed (${response.status}): ${message}`);
|
|
57
|
+
}
|
|
58
|
+
const payload = (await response.json());
|
|
59
|
+
// NoMatch means the recognizer found no speech — return empty text gracefully.
|
|
60
|
+
if (payload.RecognitionStatus === 'NoMatch') {
|
|
61
|
+
return {
|
|
62
|
+
text: '',
|
|
63
|
+
language: lang,
|
|
64
|
+
cost: 0,
|
|
65
|
+
isFinal: true,
|
|
66
|
+
providerResponse: payload,
|
|
67
|
+
usage: {
|
|
68
|
+
durationMinutes: (audio.durationSeconds ?? 0) / 60,
|
|
69
|
+
modelUsed: 'azure-speech-stt',
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
}
|
|
73
|
+
const durationSeconds = typeof payload.Duration === 'number'
|
|
74
|
+
? ticksToSeconds(payload.Duration)
|
|
75
|
+
: audio.durationSeconds;
|
|
76
|
+
return {
|
|
77
|
+
text: payload.DisplayText ?? '',
|
|
78
|
+
language: lang,
|
|
79
|
+
durationSeconds,
|
|
80
|
+
cost: 0,
|
|
81
|
+
providerResponse: payload,
|
|
82
|
+
isFinal: true,
|
|
83
|
+
usage: {
|
|
84
|
+
durationMinutes: (durationSeconds ?? 0) / 60,
|
|
85
|
+
modelUsed: 'azure-speech-stt',
|
|
86
|
+
},
|
|
87
|
+
};
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
//# sourceMappingURL=AzureSpeechSTTProvider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AzureSpeechSTTProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/AzureSpeechSTTProvider.ts"],"names":[],"mappings":"AAiCA,sDAAsD;AACtD,SAAS,cAAc,CAAC,KAAa;IACnC,OAAO,KAAK,GAAG,QAAU,CAAC;AAC5B,CAAC;AAED;;;;;;;;;;;;;;GAcG;AACH,MAAM,OAAO,sBAAsB;IAOjC,YAA6B,MAAoC;QAApC,WAAM,GAAN,MAAM,CAA8B;QANjD,OAAE,GAAG,kBAAkB,CAAC;QACxB,gBAAW,GAAG,oBAAoB,CAAC;QACnC,sBAAiB,GAAG,KAAK,CAAC;QAKxC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;IAC7C,CAAC;IAED,gDAAgD;IAChD,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACd,KAAuB,EACvB,UAAsC,EAAE;QAExC,MAAM,IAAI,GAAG,OAAO,CAAC,QAAQ,IAAI,OAAO,CAAC;QACzC,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAEpC,MAAM,GAAG,GACP,WAAW,MAAM,2BAA2B;YAC5C,uDAAuD;YACvD,aAAa,kBAAkB,CAAC,IAAI,CAAC,EAAE,CAAC;QAE1C,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE;YACzC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,2BAA2B,EAAE,GAAG;gBAChC,cAAc,EAAE,WAAW;aAC5B;YACD,IAAI,EAAE,KAAK,CAAC,IAA2B;SACxC,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,MAAM,OAAO,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,OAAO,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAwB,CAAC;QAE/D,+EAA+E;QAC/E,IAAI,OAAO,CAAC,iBAAiB,KAAK,SAAS,EAAE,CAAC;YAC5C,OAAO;gBACL,IAAI,EAAE,EAAE;gBACR,QAAQ,EAAE,IAAI;gBACd,IAAI,EAAE,CAAC;gBACP,OAAO,EAAE,IAAI;gBACb,gBAAgB,EAAE,OAAO;gBACzB,KAAK,EAAE;oBACL,eAAe,EAAE,CAAC,KAAK,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,EAAE;oBAClD,SAAS,EAAE,kBAAkB;iBAC9B;aACF,CAAC;QACJ,CAAC;QAED,MAAM,eAAe,GACnB,OAAO,OAAO,CAAC,QAAQ,KAAK,QAAQ;YAClC,CAAC,CAAC,cAAc,CAAC,OAAO,CAAC,QAAQ,CAAC;YAClC,CAAC,CAAC,KAAK,CAAC,eAAe,CAAC;QAE5B,OAAO;YACL,IAAI,EAAE,OAAO,CAAC,WAAW,IAAI,EAAE;YAC/B,QAAQ,EAAE,IAAI;YACd,eAAe;YACf,IAAI,EAAE,CAAC;YACP,gBAAgB,EAAE,OAAO;YACzB,OAAO,EAAE,IAAI;YACb,KAAK,EAAE;gBACL,eAAe,EAAE,CAAC,eAAe,IAAI,CAAC,CAAC,GAAG,EAAE;gBAC5C,SAAS,EAAE,kBAAkB;aAC9B;SACF,CAAC;IACJ,CAAC;CACF"}
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
import type { SpeechSynthesisOptions, SpeechSynthesisResult, SpeechVoice, TextToSpeechProvider } from '../types.js';
|
|
2
|
+
/** Configuration for the AzureSpeechTTSProvider. */
|
|
3
|
+
export interface AzureSpeechTTSProviderConfig {
|
|
4
|
+
/** Azure Cognitive Services subscription key. */
|
|
5
|
+
key: string;
|
|
6
|
+
/** Azure region, e.g. `'eastus'` or `'westeurope'`. */
|
|
7
|
+
region: string;
|
|
8
|
+
/**
|
|
9
|
+
* Default voice name to use when none is specified per-request.
|
|
10
|
+
* @default 'en-US-JennyNeural'
|
|
11
|
+
*/
|
|
12
|
+
defaultVoice?: string;
|
|
13
|
+
/**
|
|
14
|
+
* Custom fetch implementation, useful for testing.
|
|
15
|
+
* Defaults to the global `fetch`.
|
|
16
|
+
*/
|
|
17
|
+
fetchImpl?: typeof fetch;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Text-to-speech provider that uses the Azure Cognitive Services Speech REST API.
|
|
21
|
+
*
|
|
22
|
+
* Generates audio via SSML synthesis and returns the raw MP3 buffer. Streaming
|
|
23
|
+
* is supported in the sense that the provider can be used inside a streaming
|
|
24
|
+
* pipeline — the actual HTTP request is a single synchronous call.
|
|
25
|
+
*
|
|
26
|
+
* @example
|
|
27
|
+
* ```ts
|
|
28
|
+
* const provider = new AzureSpeechTTSProvider({ key: process.env.AZURE_SPEECH_KEY!, region: 'eastus' });
|
|
29
|
+
* const result = await provider.synthesize('Hello world');
|
|
30
|
+
* // result.audioBuffer contains MP3 bytes
|
|
31
|
+
* ```
|
|
32
|
+
*/
|
|
33
|
+
export declare class AzureSpeechTTSProvider implements TextToSpeechProvider {
|
|
34
|
+
private readonly config;
|
|
35
|
+
readonly id = "azure-speech-tts";
|
|
36
|
+
readonly displayName = "Azure Speech (TTS)";
|
|
37
|
+
readonly supportsStreaming = true;
|
|
38
|
+
private readonly fetchImpl;
|
|
39
|
+
private readonly defaultVoice;
|
|
40
|
+
constructor(config: AzureSpeechTTSProviderConfig);
|
|
41
|
+
/** Returns the human-readable provider name. */
|
|
42
|
+
getProviderName(): string;
|
|
43
|
+
/**
|
|
44
|
+
* Synthesizes speech from plain text using the Azure TTS REST endpoint.
|
|
45
|
+
*
|
|
46
|
+
* @param text - The utterance to convert to audio.
|
|
47
|
+
* @param options - Optional synthesis settings (voice override…).
|
|
48
|
+
* @returns A promise resolving to the MP3 audio buffer and metadata.
|
|
49
|
+
* @throws When the Azure API returns a non-2xx status.
|
|
50
|
+
*/
|
|
51
|
+
synthesize(text: string, options?: SpeechSynthesisOptions): Promise<SpeechSynthesisResult>;
|
|
52
|
+
/**
|
|
53
|
+
* Retrieves the list of available neural voices from the Azure region.
|
|
54
|
+
*
|
|
55
|
+
* @returns A promise resolving to an array of normalised {@link SpeechVoice} entries.
|
|
56
|
+
* @throws When the Azure API returns a non-2xx status.
|
|
57
|
+
*/
|
|
58
|
+
listAvailableVoices(): Promise<SpeechVoice[]>;
|
|
59
|
+
}
|
|
60
|
+
//# sourceMappingURL=AzureSpeechTTSProvider.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AzureSpeechTTSProvider.d.ts","sourceRoot":"","sources":["../../../src/speech/providers/AzureSpeechTTSProvider.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EACV,sBAAsB,EACtB,qBAAqB,EACrB,WAAW,EACX,oBAAoB,EACrB,MAAM,aAAa,CAAC;AAErB,oDAAoD;AACpD,MAAM,WAAW,4BAA4B;IAC3C,iDAAiD;IACjD,GAAG,EAAE,MAAM,CAAC;IACZ,uDAAuD;IACvD,MAAM,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,YAAY,CAAC,EAAE,MAAM,CAAC;IACtB;;;OAGG;IACH,SAAS,CAAC,EAAE,OAAO,KAAK,CAAC;CAC1B;AAyDD;;;;;;;;;;;;;GAaG;AACH,qBAAa,sBAAuB,YAAW,oBAAoB;IAQrD,OAAO,CAAC,QAAQ,CAAC,MAAM;IAPnC,SAAgB,EAAE,sBAAsB;IACxC,SAAgB,WAAW,wBAAwB;IACnD,SAAgB,iBAAiB,QAAQ;IAEzC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAe;IACzC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAS;gBAET,MAAM,EAAE,4BAA4B;IAKjE,gDAAgD;IAChD,eAAe,IAAI,MAAM;IAIzB;;;;;;;OAOG;IACG,UAAU,CACd,IAAI,EAAE,MAAM,EACZ,OAAO,GAAE,sBAA2B,GACnC,OAAO,CAAC,qBAAqB,CAAC;IAsCjC;;;;;OAKG;IACG,mBAAmB,IAAI,OAAO,CAAC,WAAW,EAAE,CAAC;CAgBpD"}
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Escapes special XML characters in text before embedding it in SSML.
|
|
3
|
+
* Azure's TTS endpoint expects well-formed XML; unescaped `<`, `>`, or `&`
|
|
4
|
+
* characters in the input text would cause a 400 error.
|
|
5
|
+
*/
|
|
6
|
+
function escapeXml(text) {
|
|
7
|
+
return text
|
|
8
|
+
.replace(/&/g, '&')
|
|
9
|
+
.replace(/</g, '<')
|
|
10
|
+
.replace(/>/g, '>')
|
|
11
|
+
.replace(/"/g, '"')
|
|
12
|
+
.replace(/'/g, ''');
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Builds the SSML payload sent to the Azure TTS REST endpoint.
|
|
16
|
+
*
|
|
17
|
+
* @param text - Plain-text utterance to synthesize.
|
|
18
|
+
* @param voice - Azure voice short-name, e.g. `'en-US-JennyNeural'`.
|
|
19
|
+
*/
|
|
20
|
+
function buildSsml(text, voice) {
|
|
21
|
+
return (`<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xml:lang="en-US">` +
|
|
22
|
+
`<voice name="${voice}">${escapeXml(text)}</voice>` +
|
|
23
|
+
`</speak>`);
|
|
24
|
+
}
|
|
25
|
+
/**
|
|
26
|
+
* Maps an Azure voice list entry to the normalised {@link SpeechVoice} shape.
|
|
27
|
+
*/
|
|
28
|
+
function mapVoice(entry) {
|
|
29
|
+
const gender = entry.Gender?.toLowerCase();
|
|
30
|
+
return {
|
|
31
|
+
id: entry.ShortName,
|
|
32
|
+
name: entry.DisplayName,
|
|
33
|
+
gender: gender === 'male' || gender === 'female' || gender === 'neutral'
|
|
34
|
+
? gender
|
|
35
|
+
: gender,
|
|
36
|
+
lang: entry.LocaleName,
|
|
37
|
+
provider: 'azure-speech-tts',
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
/**
|
|
41
|
+
* Text-to-speech provider that uses the Azure Cognitive Services Speech REST API.
|
|
42
|
+
*
|
|
43
|
+
* Generates audio via SSML synthesis and returns the raw MP3 buffer. Streaming
|
|
44
|
+
* is supported in the sense that the provider can be used inside a streaming
|
|
45
|
+
* pipeline — the actual HTTP request is a single synchronous call.
|
|
46
|
+
*
|
|
47
|
+
* @example
|
|
48
|
+
* ```ts
|
|
49
|
+
* const provider = new AzureSpeechTTSProvider({ key: process.env.AZURE_SPEECH_KEY!, region: 'eastus' });
|
|
50
|
+
* const result = await provider.synthesize('Hello world');
|
|
51
|
+
* // result.audioBuffer contains MP3 bytes
|
|
52
|
+
* ```
|
|
53
|
+
*/
|
|
54
|
+
export class AzureSpeechTTSProvider {
|
|
55
|
+
constructor(config) {
|
|
56
|
+
this.config = config;
|
|
57
|
+
this.id = 'azure-speech-tts';
|
|
58
|
+
this.displayName = 'Azure Speech (TTS)';
|
|
59
|
+
this.supportsStreaming = true;
|
|
60
|
+
this.fetchImpl = config.fetchImpl ?? fetch;
|
|
61
|
+
this.defaultVoice = config.defaultVoice ?? 'en-US-JennyNeural';
|
|
62
|
+
}
|
|
63
|
+
/** Returns the human-readable provider name. */
|
|
64
|
+
getProviderName() {
|
|
65
|
+
return this.displayName;
|
|
66
|
+
}
|
|
67
|
+
/**
|
|
68
|
+
* Synthesizes speech from plain text using the Azure TTS REST endpoint.
|
|
69
|
+
*
|
|
70
|
+
* @param text - The utterance to convert to audio.
|
|
71
|
+
* @param options - Optional synthesis settings (voice override…).
|
|
72
|
+
* @returns A promise resolving to the MP3 audio buffer and metadata.
|
|
73
|
+
* @throws When the Azure API returns a non-2xx status.
|
|
74
|
+
*/
|
|
75
|
+
async synthesize(text, options = {}) {
|
|
76
|
+
const voice = options.voice ?? this.defaultVoice;
|
|
77
|
+
const { key, region } = this.config;
|
|
78
|
+
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/v1`;
|
|
79
|
+
const ssml = buildSsml(text, voice);
|
|
80
|
+
const response = await this.fetchImpl(url, {
|
|
81
|
+
method: 'POST',
|
|
82
|
+
headers: {
|
|
83
|
+
'Ocp-Apim-Subscription-Key': key,
|
|
84
|
+
'Content-Type': 'application/ssml+xml',
|
|
85
|
+
'X-Microsoft-OutputFormat': 'audio-24khz-96kbitrate-mono-mp3',
|
|
86
|
+
},
|
|
87
|
+
body: ssml,
|
|
88
|
+
});
|
|
89
|
+
if (!response.ok) {
|
|
90
|
+
const message = await response.text();
|
|
91
|
+
throw new Error(`Azure Speech TTS failed (${response.status}): ${message}`);
|
|
92
|
+
}
|
|
93
|
+
const arrayBuffer = await response.arrayBuffer();
|
|
94
|
+
const audioBuffer = Buffer.from(arrayBuffer);
|
|
95
|
+
return {
|
|
96
|
+
audioBuffer,
|
|
97
|
+
mimeType: 'audio/mpeg',
|
|
98
|
+
cost: 0,
|
|
99
|
+
voiceUsed: voice,
|
|
100
|
+
providerName: this.displayName,
|
|
101
|
+
usage: {
|
|
102
|
+
characters: text.length,
|
|
103
|
+
modelUsed: 'azure-speech-tts',
|
|
104
|
+
},
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Retrieves the list of available neural voices from the Azure region.
|
|
109
|
+
*
|
|
110
|
+
* @returns A promise resolving to an array of normalised {@link SpeechVoice} entries.
|
|
111
|
+
* @throws When the Azure API returns a non-2xx status.
|
|
112
|
+
*/
|
|
113
|
+
async listAvailableVoices() {
|
|
114
|
+
const { key, region } = this.config;
|
|
115
|
+
const url = `https://${region}.tts.speech.microsoft.com/cognitiveservices/voices/list`;
|
|
116
|
+
const response = await this.fetchImpl(url, {
|
|
117
|
+
headers: { 'Ocp-Apim-Subscription-Key': key },
|
|
118
|
+
});
|
|
119
|
+
if (!response.ok) {
|
|
120
|
+
const message = await response.text();
|
|
121
|
+
throw new Error(`Azure Speech voice list failed (${response.status}): ${message}`);
|
|
122
|
+
}
|
|
123
|
+
const voices = (await response.json());
|
|
124
|
+
return voices.map(mapVoice);
|
|
125
|
+
}
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=AzureSpeechTTSProvider.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"AzureSpeechTTSProvider.js","sourceRoot":"","sources":["../../../src/speech/providers/AzureSpeechTTSProvider.ts"],"names":[],"mappings":"AAmCA;;;;GAIG;AACH,SAAS,SAAS,CAAC,IAAY;IAC7B,OAAO,IAAI;SACR,OAAO,CAAC,IAAI,EAAE,OAAO,CAAC;SACtB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,MAAM,CAAC;SACrB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC;SACvB,OAAO,CAAC,IAAI,EAAE,QAAQ,CAAC,CAAC;AAC7B,CAAC;AAED;;;;;GAKG;AACH,SAAS,SAAS,CAAC,IAAY,EAAE,KAAa;IAC5C,OAAO,CACL,oFAAoF;QACpF,gBAAgB,KAAK,KAAK,SAAS,CAAC,IAAI,CAAC,UAAU;QACnD,UAAU,CACX,CAAC;AACJ,CAAC;AAED;;GAEG;AACH,SAAS,QAAQ,CAAC,KAAsB;IACtC,MAAM,MAAM,GAAG,KAAK,CAAC,MAAM,EAAE,WAAW,EAAE,CAAC;IAC3C,OAAO;QACL,EAAE,EAAE,KAAK,CAAC,SAAS;QACnB,IAAI,EAAE,KAAK,CAAC,WAAW;QACvB,MAAM,EACJ,MAAM,KAAK,MAAM,IAAI,MAAM,KAAK,QAAQ,IAAI,MAAM,KAAK,SAAS;YAC9D,CAAC,CAAE,MAAwC;YAC3C,CAAC,CAAC,MAAM;QACZ,IAAI,EAAE,KAAK,CAAC,UAAU;QACtB,QAAQ,EAAE,kBAAkB;KAC7B,CAAC;AACJ,CAAC;AAED;;;;;;;;;;;;;GAaG;AACH,MAAM,OAAO,sBAAsB;IAQjC,YAA6B,MAAoC;QAApC,WAAM,GAAN,MAAM,CAA8B;QAPjD,OAAE,GAAG,kBAAkB,CAAC;QACxB,gBAAW,GAAG,oBAAoB,CAAC;QACnC,sBAAiB,GAAG,IAAI,CAAC;QAMvC,IAAI,CAAC,SAAS,GAAG,MAAM,CAAC,SAAS,IAAI,KAAK,CAAC;QAC3C,IAAI,CAAC,YAAY,GAAG,MAAM,CAAC,YAAY,IAAI,mBAAmB,CAAC;IACjE,CAAC;IAED,gDAAgD;IAChD,eAAe;QACb,OAAO,IAAI,CAAC,WAAW,CAAC;IAC1B,CAAC;IAED;;;;;;;OAOG;IACH,KAAK,CAAC,UAAU,CACd,IAAY,EACZ,UAAkC,EAAE;QAEpC,MAAM,KAAK,GAAG,OAAO,CAAC,KAAK,IAAI,IAAI,CAAC,YAAY,CAAC;QACjD,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QAEpC,MAAM,GAAG,GAAG,WAAW,MAAM,gDAAgD,CAAC;QAC9E,MAAM,IAAI,GAAG,SAAS,CAAC,IAAI,EAAE,KAAK,CAAC,CAAC;QAEpC,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE;YACzC,MAAM,EAAE,MAAM;YACd,OAAO,EAAE;gBACP,2BAA2B,EAAE,GAAG;gBAChC,cAAc,EAAE,sBAAsB;gBACtC,0BAA0B,EAAE,iCAAiC;aAC9D;YACD,IAAI,EAAE,IAAI;SACX,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,4BAA4B,QAAQ,CAAC,MAAM,MAAM,OAAO,EAAE,CAAC,CAAC;QAC9E,CAAC;QAED,MAAM,WAAW,GAAG,MAAM,QAAQ,CAAC,WAAW,EAAE,CAAC;QACjD,MAAM,WAAW,GAAG,MAAM,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QAE7C,OAAO;YACL,WAAW;YACX,QAAQ,EAAE,YAAY;YACtB,IAAI,EAAE,CAAC;YACP,SAAS,EAAE,KAAK;YAChB,YAAY,EAAE,IAAI,CAAC,WAAW;YAC9B,KAAK,EAAE;gBACL,UAAU,EAAE,IAAI,CAAC,MAAM;gBACvB,SAAS,EAAE,kBAAkB;aAC9B;SACF,CAAC;IACJ,CAAC;IAED;;;;;OAKG;IACH,KAAK,CAAC,mBAAmB;QACvB,MAAM,EAAE,GAAG,EAAE,MAAM,EAAE,GAAG,IAAI,CAAC,MAAM,CAAC;QACpC,MAAM,GAAG,GAAG,WAAW,MAAM,yDAAyD,CAAC;QAEvF,MAAM,QAAQ,GAAG,MAAM,IAAI,CAAC,SAAS,CAAC,GAAG,EAAE;YACzC,OAAO,EAAE,EAAE,2BAA2B,EAAE,GAAG,EAAE;SAC9C,CAAC,CAAC;QAEH,IAAI,CAAC,QAAQ,CAAC,EAAE,EAAE,CAAC;YACjB,MAAM,OAAO,GAAG,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAC;YACtC,MAAM,IAAI,KAAK,CAAC,mCAAmC,QAAQ,CAAC,MAAM,MAAM,OAAO,EAAE,CAAC,CAAC;QACrF,CAAC;QAED,MAAM,MAAM,GAAG,CAAC,MAAM,QAAQ,CAAC,IAAI,EAAE,CAAsB,CAAC;QAC5D,OAAO,MAAM,CAAC,GAAG,CAAC,QAAQ,CAAC,CAAC;IAC9B,CAAC;CACF"}
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import type { SpeechAudioInput, SpeechToTextProvider, SpeechTranscriptionOptions, SpeechTranscriptionResult } from '../types.js';
|
|
2
|
+
/** Configuration for the DeepgramBatchSTTProvider. */
|
|
3
|
+
export interface DeepgramBatchSTTProviderConfig {
|
|
4
|
+
/** Deepgram API key. */
|
|
5
|
+
apiKey: string;
|
|
6
|
+
/**
|
|
7
|
+
* Deepgram model to use for transcription.
|
|
8
|
+
* @default 'nova-2'
|
|
9
|
+
*/
|
|
10
|
+
model?: string;
|
|
11
|
+
/**
|
|
12
|
+
* BCP-47 language code, e.g. `'en-US'`.
|
|
13
|
+
* When omitted Deepgram uses automatic language detection.
|
|
14
|
+
*/
|
|
15
|
+
language?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Custom fetch implementation, useful for testing.
|
|
18
|
+
* Defaults to the global `fetch`.
|
|
19
|
+
*/
|
|
20
|
+
fetchImpl?: typeof fetch;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Speech-to-text provider that uses the Deepgram batch (pre-recorded) REST API.
|
|
24
|
+
*
|
|
25
|
+
* Sends audio as a raw binary body and returns a normalised
|
|
26
|
+
* {@link SpeechTranscriptionResult}. Streaming is not supported — use a
|
|
27
|
+
* Deepgram streaming adapter for real-time transcription.
|
|
28
|
+
*
|
|
29
|
+
* @example
|
|
30
|
+
* ```ts
|
|
31
|
+
* const provider = new DeepgramBatchSTTProvider({ apiKey: process.env.DEEPGRAM_API_KEY! });
|
|
32
|
+
* const result = await provider.transcribe({ data: audioBuffer, mimeType: 'audio/wav' });
|
|
33
|
+
* console.log(result.text);
|
|
34
|
+
* ```
|
|
35
|
+
*/
|
|
36
|
+
export declare class DeepgramBatchSTTProvider implements SpeechToTextProvider {
|
|
37
|
+
private readonly config;
|
|
38
|
+
readonly id = "deepgram-batch";
|
|
39
|
+
readonly displayName = "Deepgram (Batch)";
|
|
40
|
+
readonly supportsStreaming = false;
|
|
41
|
+
private readonly fetchImpl;
|
|
42
|
+
constructor(config: DeepgramBatchSTTProviderConfig);
|
|
43
|
+
/** Returns the human-readable provider name. */
|
|
44
|
+
getProviderName(): string;
|
|
45
|
+
/**
|
|
46
|
+
* Transcribes an audio buffer using the Deepgram pre-recorded API.
|
|
47
|
+
*
|
|
48
|
+
* @param audio - Raw audio data and associated metadata.
|
|
49
|
+
* @param options - Optional transcription settings (language, diarization…).
|
|
50
|
+
* @returns A promise resolving to the normalised transcription result.
|
|
51
|
+
* @throws When the Deepgram API returns a non-2xx status.
|
|
52
|
+
*/
|
|
53
|
+
transcribe(audio: SpeechAudioInput, options?: SpeechTranscriptionOptions): Promise<SpeechTranscriptionResult>;
|
|
54
|
+
}
|
|
55
|
+
//# sourceMappingURL=DeepgramBatchSTTProvider.d.ts.map
|