@bestcodes/edge-tts 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +115 -0
- package/dist/MsEdgeTTS.js +1 -0
- package/dist/OUTPUT_FORMAT.js +1 -0
- package/dist/PITCH.js +1 -0
- package/dist/RATE.js +1 -0
- package/dist/VOLUME.js +1 -0
- package/dist/index.js +1 -0
- package/dist/types/MsEdgeTTS.d.ts +141 -0
- package/dist/types/OUTPUT_FORMAT.d.ts +8 -0
- package/dist/types/PITCH.d.ts +11 -0
- package/dist/types/RATE.d.ts +11 -0
- package/dist/types/VOLUME.d.ts +12 -0
- package/dist/types/index.d.ts +5 -0
- package/package.json +50 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2024 The-Best-Codes
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# @bestcodes/edge-tts
|
|
2
|
+
|
|
3
|
+
[](https://badge.fury.io/js/%40bestcodes%2Fedge-tts)
|
|
4
|
+
|
|
5
|
+
## This is a fork.
|
|
6
|
+
|
|
7
|
+
The original version, here:
|
|
8
|
+
https://github.com/Migushthe2nd/MsEdgeTTS
|
|
9
|
+
Was undermaintained and had dependency issues, so I forked the repo and fixed them.
|
|
10
|
+
|
|
11
|
+
An simple Azure Speech Service module that uses the Microsoft Edge Read Aloud API.
|
|
12
|
+
|
|
13
|
+
Only supports `speak`, `voice`, and `prosody` element types. The following is the default SSML object:
|
|
14
|
+
|
|
15
|
+
```xml
|
|
16
|
+
<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts"
|
|
17
|
+
xml:lang="${this._voiceLang}">
|
|
18
|
+
<voice name="${voiceName}">
|
|
19
|
+
<prosody rate="${rate}" pitch="${pitch}" volume="${volume}">
|
|
20
|
+
${input}
|
|
21
|
+
</prosody>
|
|
22
|
+
</voice>
|
|
23
|
+
</speak>
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Documentation on the SSML
|
|
27
|
+
format [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup)
|
|
28
|
+
. All supported audio formats [can be found here](./src/OUTPUT_FORMAT.ts).
|
|
29
|
+
|
|
30
|
+
## Example usage
|
|
31
|
+
|
|
32
|
+
Make sure to **escape/sanitize** your user's input!
|
|
33
|
+
Use a library like [xml-escape](https://www.npmjs.com/package/xml-escape).
|
|
34
|
+
|
|
35
|
+
### Write to stream
|
|
36
|
+
|
|
37
|
+
```js
|
|
38
|
+
import { MsEdgeTTS, OUTPUT_FORMAT } from "msedge-tts";
|
|
39
|
+
|
|
40
|
+
const tts = new MsEdgeTTS();
|
|
41
|
+
await tts.setMetadata(
|
|
42
|
+
"en-IE-ConnorNeural",
|
|
43
|
+
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
|
|
44
|
+
);
|
|
45
|
+
const readable = tts.toStream("Hi, how are you?");
|
|
46
|
+
|
|
47
|
+
readable.on("data", (data) => {
|
|
48
|
+
console.log("DATA RECEIVED", data);
|
|
49
|
+
// raw audio file data
|
|
50
|
+
});
|
|
51
|
+
|
|
52
|
+
readable.on("close", () => {
|
|
53
|
+
console.log("STREAM CLOSED");
|
|
54
|
+
});
|
|
55
|
+
```
|
|
56
|
+
|
|
57
|
+
### Write to file
|
|
58
|
+
|
|
59
|
+
```js
|
|
60
|
+
import { MsEdgeTTS, OUTPUT_FORMAT } from "msedge-tts";
|
|
61
|
+
|
|
62
|
+
(async () => {
|
|
63
|
+
const tts = new MsEdgeTTS();
|
|
64
|
+
await tts.setMetadata(
|
|
65
|
+
"en-US-AriaNeural",
|
|
66
|
+
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
|
|
67
|
+
);
|
|
68
|
+
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
|
|
69
|
+
})();
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### Change voice rate, pitch and volume
|
|
73
|
+
|
|
74
|
+
```js
|
|
75
|
+
import { MsEdgeTTS, OUTPUT_FORMAT } from "msedge-tts";
|
|
76
|
+
|
|
77
|
+
(async () => {
|
|
78
|
+
const tts = new MsEdgeTTS();
|
|
79
|
+
await tts.setMetadata(
|
|
80
|
+
"en-US-AriaNeural",
|
|
81
|
+
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
|
|
82
|
+
);
|
|
83
|
+
const filePath = await tts.toFile(
|
|
84
|
+
"./example_audio.webm",
|
|
85
|
+
"Hi, how are you?",
|
|
86
|
+
{ rate: 0.5, pitch: "+200Hz" }
|
|
87
|
+
);
|
|
88
|
+
})();
|
|
89
|
+
```
|
|
90
|
+
|
|
91
|
+
### Use an alternative HTTP Agent
|
|
92
|
+
|
|
93
|
+
Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
|
|
94
|
+
|
|
95
|
+
```js
|
|
96
|
+
import { SocksProxyAgent } from "socks-proxy-agent";
|
|
97
|
+
|
|
98
|
+
(async () => {
|
|
99
|
+
const agent = new SocksProxyAgent(
|
|
100
|
+
"socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com"
|
|
101
|
+
);
|
|
102
|
+
const tts = new MsEdgeTTS(agent);
|
|
103
|
+
await tts.setMetadata(
|
|
104
|
+
"en-US-AriaNeural",
|
|
105
|
+
OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
|
|
106
|
+
);
|
|
107
|
+
const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
|
|
108
|
+
})();
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
## API
|
|
112
|
+
|
|
113
|
+
For the full documentation check out the [API Documentation](https://migushthe2nd.github.io/MsEdgeTTS).
|
|
114
|
+
|
|
115
|
+
This library only supports promises.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var __assign=this&&this.__assign||function(){return __assign=Object.assign||function(t){for(var e,n=1,o=arguments.length;n<o;n++)for(var a in e=arguments[n])Object.prototype.hasOwnProperty.call(e,a)&&(t[a]=e[a]);return t},__assign.apply(this,arguments)},__createBinding=this&&this.__createBinding||(Object.create?function(t,e,n,o){void 0===o&&(o=n);var a=Object.getOwnPropertyDescriptor(e,n);a&&!("get"in a?!e.__esModule:a.writable||a.configurable)||(a={enumerable:!0,get:function(){return e[n]}}),Object.defineProperty(t,o,a)}:function(t,e,n,o){void 0===o&&(o=n),t[o]=e[n]}),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?function(t,e){Object.defineProperty(t,"default",{enumerable:!0,value:e})}:function(t,e){t.default=e}),__importStar=this&&this.__importStar||function(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var n in t)"default"!==n&&Object.prototype.hasOwnProperty.call(t,n)&&__createBinding(e,t,n);return __setModuleDefault(e,t),e},__awaiter=this&&this.__awaiter||function(t,e,n,o){return new(n||(n=Promise))((function(a,r){function s(t){try{c(o.next(t))}catch(t){r(t)}}function i(t){try{c(o.throw(t))}catch(t){r(t)}}function c(t){var e;t.done?a(t.value):(e=t.value,e instanceof n?e:new n((function(t){t(e)}))).then(s,i)}c((o=o.apply(t,e||[])).next())}))},__generator=this&&this.__generator||function(t,e){var n,o,a,r,s={label:0,sent:function(){if(1&a[0])throw a[1];return a[1]},trys:[],ops:[]};return r={next:i(0),throw:i(1),return:i(2)},"function"==typeof Symbol&&(r[Symbol.iterator]=function(){return this}),r;function i(i){return function(c){return function(i){if(n)throw new TypeError("Generator is already executing.");for(;r&&(r=0,i[0]&&(s=0)),s;)try{if(n=1,o&&(a=2&i[0]?o.return:i[0]?o.throw||((a=o.return)&&a.call(o),0):o.next)&&!(a=a.call(o,i[1])).done)return a;switch(o=0,a&&(i=[2&i[0],a.value]),i[0]){case 0:case 1:a=i;break;case 4:return s.label++,{value:i[1],done:!1};case 5:s.label++,o=i[1],i=[0];continue;case 7:i=s.ops.pop(),s.trys.pop();continue;default:if(!(a=s.trys,(a=a.length>0&&a[a.length-1])||6!==i[0]&&2!==i[0])){s=0;continue}if(3===i[0]&&(!a||i[1]>a[0]&&i[1]<a[3])){s.label=i[1];break}if(6===i[0]&&s.label<a[1]){s.label=a[1],a=i;break}if(a&&s.label<a[2]){s.label=a[2],s.ops.push(i);break}a[2]&&s.ops.pop(),s.trys.pop();continue}i=e.call(t,s)}catch(t){i=[6,t],o=0}finally{n=a=0}if(5&i[0])throw i[1];return{value:i[0]?i[1]:void 0,done:!0}}([i,c])}}},__importDefault=this&&this.__importDefault||function(t){return t&&t.__esModule?t:{default:t}};Object.defineProperty(exports,"__esModule",{value:!0}),exports.MsEdgeTTS=exports.MetadataOptions=exports.ProsodyOptions=void 0;var axios_1=__importDefault(require("axios")),isomorphic_ws_1=__importDefault(require("isomorphic-ws")),buffer_1=require("buffer/"),randombytes_1=__importDefault(require("randombytes")),OUTPUT_FORMAT_1=require("./OUTPUT_FORMAT"),stream_1=require("stream"),fs=__importStar(require("fs")),ProsodyOptions=function(){this.pitch="+0Hz",this.rate=1,this.volume=100};exports.ProsodyOptions=ProsodyOptions;var messageTypes,MetadataOptions=function(){this.sentenceBoundaryEnabled=!1,this.wordBoundaryEnabled=!1};exports.MetadataOptions=MetadataOptions,function(t){t.TURN_START="turn.start",t.TURN_END="turn.end",t.RESPONSE="response",t.SPEECH_CONFIG="speech.config",t.AUDIO_METADATA="audio.metadata",t.AUDIO="audio",t.SSML="ssml"}(messageTypes||(messageTypes={}));var MsEdgeTTS=function(){function t(t,e){void 0===e&&(e=!1),this._metadataOptions=new MetadataOptions,this._streams={},this._startTime=0,this._agent=t,this._enableLogger=e,this._isBrowser="undefined"!=typeof window&&void 0!==window.document}return t.prototype._log=function(){for(var t=[],e=0;e<arguments.length;e++)t[e]=arguments[e];this._enableLogger&&console.log.apply(console,t)},t.prototype._send=function(t){return __awaiter(this,void 0,void 0,(function(){var e,n=this;return __generator(this,(function(o){switch(o.label){case 0:e=1,o.label=1;case 1:return e<=3&&this._ws.readyState!==this._ws.OPEN?(1==e&&(this._startTime=Date.now()),this._log("connecting: ",e),[4,this._initClient()]):[3,4];case 2:o.sent(),o.label=3;case 3:return e++,[3,1];case 4:return this._ws.send(t,(function(){n._log("<-",t)})),[2]}}))}))},t.prototype._initClient=function(){var e=this;return this._ws=this._isBrowser?new isomorphic_ws_1.default(t.SYNTH_URL):new isomorphic_ws_1.default(t.SYNTH_URL,{agent:this._agent}),this._ws.binaryType="arraybuffer",new Promise((function(n,o){e._ws.onopen=function(){e._log("Connected in",(Date.now()-e._startTime)/1e3,"seconds"),e._send("Content-Type:application/json; charset=utf-8\r\nPath:".concat(messageTypes.SPEECH_CONFIG).concat(t.JSON_XML_DELIM,'\n {\n "context": {\n "synthesis": {\n "audio": {\n "metadataoptions": {\n "sentenceBoundaryEnabled": "').concat(e._metadataOptions.sentenceBoundaryEnabled,'",\n "wordBoundaryEnabled": "').concat(e._metadataOptions.wordBoundaryEnabled,'"\n },\n "outputFormat": "').concat(e._outputFormat,'" \n }\n }\n }\n }\n ')).then(n)},e._ws.onmessage=function(n){var o,a=buffer_1.Buffer.from(n.data),r=a.toString(),s=(null===(o=/X-RequestId:(.*?)\r\n/gm.exec(r))||void 0===o?void 0:o[1])||"";if(r.includes("Path:".concat(messageTypes.TURN_START)))e._log("->",r);else if(r.includes("Path:".concat(messageTypes.TURN_END)))e._log("->",r),e._streams[s].audio.push(null);else if(r.includes("Path:".concat(messageTypes.RESPONSE)))e._log("->",r);else if(r.includes("Path:".concat(messageTypes.AUDIO_METADATA))){var i=a.indexOf(t.JSON_XML_DELIM)+t.JSON_XML_DELIM.length,c=a.subarray(i);e._log("->",r),e._pushMetadata(c,s)}else if(r.includes("Path:".concat(messageTypes.AUDIO))&&n.data instanceof ArrayBuffer){i=a.indexOf(t.AUDIO_DELIM)+t.AUDIO_DELIM.length;var u=a.subarray(0,i).toString();c=a.subarray(i);e._log("->",u),e._pushAudioData(c,s)}else e._log("->","UNKNOWN MESSAGE",r)},e._ws.onclose=function(){for(var t in e._log("disconnected after:",(Date.now()-e._startTime)/1e3,"seconds"),e._streams)e._streams[t].audio.push(null)},e._ws.onerror=function(t){o("Connect Error: "+JSON.stringify(t,null,2))}}))},t.prototype._pushAudioData=function(t,e){this._streams[e].audio.push(t)},t.prototype._pushMetadata=function(t,e){this._streams[e].metadata.push(t)},t.prototype._SSMLTemplate=function(t,e){return void 0===e&&(e={}),e=__assign(__assign({},new ProsodyOptions),e),'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="'.concat(this._metadataOptions.voiceLocale,'">\n <voice name="').concat(this._voice,'">\n <prosody pitch="').concat(e.pitch,'" rate="').concat(e.rate,'" volume="').concat(e.volume,'">\n ').concat(t,"\n </prosody> \n </voice>\n </speak>")},t.prototype.getVoices=function(){return new Promise((function(e,n){axios_1.default.get(t.VOICES_URL).then((function(t){return e(t.data)})).catch(n)}))},t.prototype.setMetadata=function(e,n,o){return __awaiter(this,void 0,void 0,(function(){var a,r,s,i;return __generator(this,(function(c){switch(c.label){case 0:if(a=this._voice,r=this._outputFormat,s=JSON.stringify(this._metadataOptions),this._voice=e,!this._metadataOptions.voiceLocale||o&&!o.voiceLocale&&a!==this._voice){if(!(i=t.VOICE_LANG_REGEX.exec(this._voice)))throw new Error("Could not infer voiceLocale from voiceName, and no voiceLocale was specified!");this._metadataOptions.voiceLocale=i[0]}return this._outputFormat=n,Object.assign(this._metadataOptions,o),a!==this._voice||r!==this._outputFormat||s!==JSON.stringify(this._metadataOptions)||this._ws.readyState!==this._ws.OPEN?(this._startTime=Date.now(),[4,this._initClient()]):[2];case 1:return c.sent(),[2]}}))}))},t.prototype._metadataCheck=function(){if(!this._ws)throw new Error("Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.")},t.prototype.close=function(){this._ws.close()},t.prototype.toFile=function(t,e,n){return this._rawSSMLRequestToFile(t,this._SSMLTemplate(e,n))},t.prototype.toStream=function(t,e){return this._rawSSMLRequest(this._SSMLTemplate(t,e)).audioStream},t.prototype.rawToFile=function(t,e){return this._rawSSMLRequestToFile(t,e)},t.prototype.rawToStream=function(t){return this._rawSSMLRequest(t).audioStream},t.prototype._rawSSMLRequestToFile=function(t,e){return __awaiter(this,void 0,void 0,(function(){var n,o,a,r,s,i,c,u=this;return __generator(this,(function(_){switch(_.label){case 0:n=this._rawSSMLRequest(e),o=n.audioStream,a=n.metadataStream,n.requestId,_.label=1;case 1:return _.trys.push([1,3,,4]),[4,Promise.all([new Promise((function(e,n){var r=o.pipe(fs.createWriteStream(t+"/example_audio.webm"));r.once("close",(function(){return __awaiter(u,void 0,void 0,(function(){return __generator(this,(function(o){return r.bytesWritten>0?e(t+"/example_audio.webm"):n("No audio data received"),[2]}))}))})),a.once("error",n)})),new Promise((function(e,n){var o=[];a.on("data",(function(t){var e=JSON.parse(t.toString());o.push.apply(o,e.Metadata)})),a.on("close",(function(){var n=t+"/example_metadata.json";fs.writeFileSync(n,JSON.stringify(o,null,2)),e(n)})),a.once("error",n)}))])];case 2:return r=_.sent(),s=r[0],i=r[1],[2,{audioFilePath:s,metadataFilePath:i}];case 3:throw c=_.sent(),o.destroy(),a.destroy(),c;case 4:return[2]}}))}))},t.prototype._rawSSMLRequest=function(e){this._metadataCheck();var n=(0,randombytes_1.default)(16).toString("hex"),o="X-RequestId:".concat(n,"\r\nContent-Type:application/ssml+xml\r\nPath:").concat(messageTypes.SSML).concat(t.JSON_XML_DELIM)+e.trim(),a=this,r=new stream_1.Readable({read:function(){},destroy:function(t,e){delete a._streams[n],e(t)}}),s=new stream_1.Readable({read:function(){}});return r.on("error",(function(t){r.destroy(),s.destroy()})),r.once("close",(function(){r.destroy(),s.destroy()})),this._streams[n]={audio:r,metadata:s},this._send(o).then(),{audioStream:r,metadataStream:s,requestId:n}},t.OUTPUT_FORMAT=OUTPUT_FORMAT_1.OUTPUT_FORMAT,t.TRUSTED_CLIENT_TOKEN="6A5AA1D4EAFF4E9FB37E23D68491D6F4",t.VOICES_URL="https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=".concat(t.TRUSTED_CLIENT_TOKEN),t.SYNTH_URL="wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=".concat(t.TRUSTED_CLIENT_TOKEN),t.JSON_XML_DELIM="\r\n\r\n",t.AUDIO_DELIM="Path:audio\r\n",t.VOICE_LANG_REGEX=/\w{2}-\w{2}/,t}();exports.MsEdgeTTS=MsEdgeTTS;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var OUTPUT_FORMAT;Object.defineProperty(exports,"__esModule",{value:!0}),exports.OUTPUT_FORMAT=void 0,function(O){O.AUDIO_24KHZ_48KBITRATE_MONO_MP3="audio-24khz-48kbitrate-mono-mp3",O.AUDIO_24KHZ_96KBITRATE_MONO_MP3="audio-24khz-96kbitrate-mono-mp3",O.WEBM_24KHZ_16BIT_MONO_OPUS="webm-24khz-16bit-mono-opus"}(OUTPUT_FORMAT||(exports.OUTPUT_FORMAT=OUTPUT_FORMAT={}));
|
package/dist/PITCH.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var PITCH;Object.defineProperty(exports,"__esModule",{value:!0}),exports.PITCH=void 0,function(e){e.X_LOW="x-low",e.LOW="low",e.MEDIUM="medium",e.HIGH="high",e.X_HIGH="x-high",e.DEFAULT="default"}(PITCH||(exports.PITCH=PITCH={}));
|
package/dist/RATE.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var RATE;Object.defineProperty(exports,"__esModule",{value:!0}),exports.RATE=void 0,function(e){e.X_SLOW="x-slow",e.SLOW="slow",e.MEDIUM="medium",e.FAST="fast",e.X_FAST="x-fast",e.DEFAULT="default"}(RATE||(exports.RATE=RATE={}));
|
package/dist/VOLUME.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";var VOLUME;Object.defineProperty(exports,"__esModule",{value:!0}),exports.VOLUME=void 0,function(e){e.SILENT="silent",e.X_SOFT="x-soft",e.SOFT="soft",e.MEDIUM="medium",e.LOUD="loud",e.X_LOUD="x-LOUD",e.DEFAULT="default"}(VOLUME||(exports.VOLUME=VOLUME={}));
|
package/dist/index.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.VOLUME=exports.RATE=exports.PITCH=exports.OUTPUT_FORMAT=exports.ProsodyOptions=exports.MsEdgeTTS=void 0;var MsEdgeTTS_1=require("./MsEdgeTTS");Object.defineProperty(exports,"MsEdgeTTS",{enumerable:!0,get:function(){return MsEdgeTTS_1.MsEdgeTTS}}),Object.defineProperty(exports,"ProsodyOptions",{enumerable:!0,get:function(){return MsEdgeTTS_1.ProsodyOptions}});var OUTPUT_FORMAT_1=require("./OUTPUT_FORMAT");Object.defineProperty(exports,"OUTPUT_FORMAT",{enumerable:!0,get:function(){return OUTPUT_FORMAT_1.OUTPUT_FORMAT}});var PITCH_1=require("./PITCH");Object.defineProperty(exports,"PITCH",{enumerable:!0,get:function(){return PITCH_1.PITCH}});var RATE_1=require("./RATE");Object.defineProperty(exports,"RATE",{enumerable:!0,get:function(){return RATE_1.RATE}});var VOLUME_1=require("./VOLUME");Object.defineProperty(exports,"VOLUME",{enumerable:!0,get:function(){return VOLUME_1.VOLUME}});
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import { OUTPUT_FORMAT } from "./OUTPUT_FORMAT";
|
|
2
|
+
import { Readable } from "stream";
|
|
3
|
+
import { Agent } from "http";
|
|
4
|
+
import { PITCH } from "./PITCH";
|
|
5
|
+
import { RATE } from "./RATE";
|
|
6
|
+
import { VOLUME } from "./VOLUME";
|
|
7
|
+
export type Voice = {
|
|
8
|
+
Name: string;
|
|
9
|
+
ShortName: string;
|
|
10
|
+
Gender: string;
|
|
11
|
+
Locale: string;
|
|
12
|
+
SuggestedCodec: string;
|
|
13
|
+
FriendlyName: string;
|
|
14
|
+
Status: string;
|
|
15
|
+
};
|
|
16
|
+
export declare class ProsodyOptions {
|
|
17
|
+
/**
|
|
18
|
+
* The pitch to use.
|
|
19
|
+
* Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%).
|
|
20
|
+
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline)
|
|
21
|
+
*/
|
|
22
|
+
pitch?: PITCH | string;
|
|
23
|
+
/**
|
|
24
|
+
* The rate to use.
|
|
25
|
+
* Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%).
|
|
26
|
+
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking)
|
|
27
|
+
*/
|
|
28
|
+
rate?: RATE | string | number;
|
|
29
|
+
/**
|
|
30
|
+
* The volume to use.
|
|
31
|
+
* Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%).
|
|
32
|
+
* [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume)
|
|
33
|
+
*/
|
|
34
|
+
volume?: VOLUME | string | number;
|
|
35
|
+
}
|
|
36
|
+
export declare class MetadataOptions {
|
|
37
|
+
/**
|
|
38
|
+
* (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`.
|
|
39
|
+
* Changing the voiceName will reset the voiceLocale.
|
|
40
|
+
*/
|
|
41
|
+
voiceLocale?: string;
|
|
42
|
+
/**
|
|
43
|
+
* (optional) whether to enable sentence boundary metadata. Default is `false`
|
|
44
|
+
*/
|
|
45
|
+
sentenceBoundaryEnabled?: boolean;
|
|
46
|
+
/**
|
|
47
|
+
* (optional) whether to enable word boundary metadata. Default is `false`
|
|
48
|
+
*/
|
|
49
|
+
wordBoundaryEnabled?: boolean;
|
|
50
|
+
}
|
|
51
|
+
export declare class MsEdgeTTS {
|
|
52
|
+
static OUTPUT_FORMAT: typeof OUTPUT_FORMAT;
|
|
53
|
+
private static TRUSTED_CLIENT_TOKEN;
|
|
54
|
+
private static VOICES_URL;
|
|
55
|
+
private static SYNTH_URL;
|
|
56
|
+
private static JSON_XML_DELIM;
|
|
57
|
+
private static AUDIO_DELIM;
|
|
58
|
+
private static VOICE_LANG_REGEX;
|
|
59
|
+
private readonly _enableLogger;
|
|
60
|
+
private readonly _isBrowser;
|
|
61
|
+
private _ws;
|
|
62
|
+
private _voice;
|
|
63
|
+
private _outputFormat;
|
|
64
|
+
private _metadataOptions;
|
|
65
|
+
private _streams;
|
|
66
|
+
private _startTime;
|
|
67
|
+
private readonly _agent;
|
|
68
|
+
private _log;
|
|
69
|
+
/**
|
|
70
|
+
* Create a new `MsEdgeTTS` instance.
|
|
71
|
+
*
|
|
72
|
+
* @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
|
|
73
|
+
* @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console
|
|
74
|
+
*/
|
|
75
|
+
constructor(agent?: Agent, enableLogger?: boolean);
|
|
76
|
+
private _send;
|
|
77
|
+
private _initClient;
|
|
78
|
+
private _pushAudioData;
|
|
79
|
+
private _pushMetadata;
|
|
80
|
+
private _SSMLTemplate;
|
|
81
|
+
/**
|
|
82
|
+
* Fetch the list of voices available in Microsoft Edge.
|
|
83
|
+
* These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
|
|
84
|
+
*/
|
|
85
|
+
getVoices(): Promise<Voice[]>;
|
|
86
|
+
/**
|
|
87
|
+
* Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
|
|
88
|
+
* Must be called at least once before text can be synthesised.
|
|
89
|
+
* Saved in this instance. Can be called at any time times to update the metadata.
|
|
90
|
+
*
|
|
91
|
+
* @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices). Changing the voiceName will reset the voiceLocale.
|
|
92
|
+
* @param outputFormat any {@link OUTPUT_FORMAT}
|
|
93
|
+
* @param metadataOptions (optional) {@link MetadataOptions}
|
|
94
|
+
*/
|
|
95
|
+
setMetadata(voiceName: string, outputFormat: OUTPUT_FORMAT, metadataOptions?: MetadataOptions): Promise<void>;
|
|
96
|
+
private _metadataCheck;
|
|
97
|
+
/**
|
|
98
|
+
* Close the WebSocket connection.
|
|
99
|
+
*/
|
|
100
|
+
close(): void;
|
|
101
|
+
/**
|
|
102
|
+
* Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
|
|
103
|
+
*
|
|
104
|
+
* @param dirPath a valid output directory path
|
|
105
|
+
* @param input the input to synthesise
|
|
106
|
+
* @param options (optional) {@link ProsodyOptions}
|
|
107
|
+
@returns {Promise<{audioFilePath: string, metadataFilePath: string}>} - a `Promise` with the full filepaths
|
|
108
|
+
*/
|
|
109
|
+
toFile(dirPath: string, input: string, options?: ProsodyOptions): Promise<{
|
|
110
|
+
audioFilePath: string;
|
|
111
|
+
metadataFilePath: string;
|
|
112
|
+
}>;
|
|
113
|
+
/**
|
|
114
|
+
* Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
|
|
115
|
+
*
|
|
116
|
+
* @param input the text to synthesise. Can include SSML elements.
|
|
117
|
+
* @param options (optional) {@link ProsodyOptions}
|
|
118
|
+
* @returns {Readable} - a `stream.Readable` with the audio data
|
|
119
|
+
*/
|
|
120
|
+
toStream(input: string, options?: ProsodyOptions): Readable;
|
|
121
|
+
/**
|
|
122
|
+
* Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
|
|
123
|
+
*
|
|
124
|
+
* @param dirPath a valid output directory path.
|
|
125
|
+
* @param requestSSML the SSML to send. SSML elements required in order to work.
|
|
126
|
+
* @returns {Promise<{audioFilePath: string, metadataFilePath: string}>} - a `Promise` with the full filepaths
|
|
127
|
+
*/
|
|
128
|
+
rawToFile(dirPath: string, requestSSML: string): Promise<{
|
|
129
|
+
audioFilePath: string;
|
|
130
|
+
metadataFilePath: string;
|
|
131
|
+
}>;
|
|
132
|
+
/**
|
|
133
|
+
* Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
|
|
134
|
+
*
|
|
135
|
+
* @param requestSSML the SSML to send. SSML elements required in order to work.
|
|
136
|
+
* @returns {Readable} - a `stream.Readable` with the audio data
|
|
137
|
+
*/
|
|
138
|
+
rawToStream(requestSSML: string): Readable;
|
|
139
|
+
private _rawSSMLRequestToFile;
|
|
140
|
+
private _rawSSMLRequest;
|
|
141
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted.
|
|
3
|
+
*/
|
|
4
|
+
export declare enum OUTPUT_FORMAT {
|
|
5
|
+
AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3",
|
|
6
|
+
AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3",
|
|
7
|
+
WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus"
|
|
8
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline
|
|
3
|
+
*/
|
|
4
|
+
export declare enum PITCH {
|
|
5
|
+
X_LOW = "x-low",
|
|
6
|
+
LOW = "low",
|
|
7
|
+
MEDIUM = "medium",
|
|
8
|
+
HIGH = "high",
|
|
9
|
+
X_HIGH = "x-high",
|
|
10
|
+
DEFAULT = "default"
|
|
11
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking
|
|
3
|
+
*/
|
|
4
|
+
export declare enum RATE {
|
|
5
|
+
X_SLOW = "x-slow",
|
|
6
|
+
SLOW = "slow",
|
|
7
|
+
MEDIUM = "medium",
|
|
8
|
+
FAST = "fast",
|
|
9
|
+
X_FAST = "x-fast",
|
|
10
|
+
DEFAULT = "default"
|
|
11
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume
|
|
3
|
+
*/
|
|
4
|
+
export declare enum VOLUME {
|
|
5
|
+
SILENT = "silent",
|
|
6
|
+
X_SOFT = "x-soft",
|
|
7
|
+
SOFT = "soft",
|
|
8
|
+
MEDIUM = "medium",
|
|
9
|
+
LOUD = "loud",
|
|
10
|
+
X_LOUD = "x-LOUD",
|
|
11
|
+
DEFAULT = "default"
|
|
12
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@bestcodes/edge-tts",
|
|
3
|
+
"version": "1.0.1",
|
|
4
|
+
"description": "An Azure Speech Service module that uses the Microsoft Edge Read Aloud API.",
|
|
5
|
+
"author": "The-Best-Codes",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/The-Best-Codes/edge-tts.git"
|
|
10
|
+
},
|
|
11
|
+
"bugs": {
|
|
12
|
+
"url": "https://github.com/The-Best-Codes/edge-tts/issues"
|
|
13
|
+
},
|
|
14
|
+
"homepage": "https://github.com/The-Best-Codes/edge-tts#readme",
|
|
15
|
+
"keywords": [
|
|
16
|
+
"tts",
|
|
17
|
+
"text-to-speech",
|
|
18
|
+
"speech",
|
|
19
|
+
"text",
|
|
20
|
+
"voice",
|
|
21
|
+
"ssml",
|
|
22
|
+
"azure",
|
|
23
|
+
"speech-synthesis",
|
|
24
|
+
"readaloud",
|
|
25
|
+
"edge",
|
|
26
|
+
"microsoft"
|
|
27
|
+
],
|
|
28
|
+
"module": "./dist/index",
|
|
29
|
+
"main": "./dist/index",
|
|
30
|
+
"scripts": {
|
|
31
|
+
"minify": "find dist/ -name '*.js' -type f -exec terser {} --compress --mangle --output {} \\;",
|
|
32
|
+
"build": "tsc --project tsconfig.json --outDir dist && npm run minify",
|
|
33
|
+
"prepublishOnly": "npm run build",
|
|
34
|
+
"depclean": "sh depclean.sh",
|
|
35
|
+
"update": "npm update --save && npm update --save-dev && npx npm-check-updates@latest -u && npm install",
|
|
36
|
+
"maintain": "npm run depclean && npm run update"
|
|
37
|
+
},
|
|
38
|
+
"devDependencies": {
|
|
39
|
+
"@types/node": "^22.8.1",
|
|
40
|
+
"@types/randombytes": "^2.0.3",
|
|
41
|
+
"axios": "^1.7.7",
|
|
42
|
+
"buffer": "^6.0.3",
|
|
43
|
+
"isomorphic-ws": "^5.0.0",
|
|
44
|
+
"randombytes": "^2.1.0",
|
|
45
|
+
"terser": "^5.36.0"
|
|
46
|
+
},
|
|
47
|
+
"files": [
|
|
48
|
+
"dist/"
|
|
49
|
+
]
|
|
50
|
+
}
|