@bestcodes/edge-tts 1.0.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,115 +1,108 @@
1
1
  # @bestcodes/edge-tts
2
2
 
3
- [![npm version](https://badge.fury.io/js/%40bestcodes%2Fedge-tts.svg)](https://badge.fury.io/js/%40bestcodes%2Fedge-tts)
3
+ A TypeScript port of [edge-tts](https://github.com/rany2/edge-tts/). Small, fast, and easy text-to-speech using Microsoft Edge's online service.
4
4
 
5
- ## This is a fork.
5
+ - Tiny bundle size (< 50kb)
6
+ - No API keys required
7
+ - Stream audio or save to file
8
+ - Generate SRT subtitles
9
+ - Access to all available Edge voices
6
10
 
7
- The original version, here:
8
- https://github.com/Migushthe2nd/MsEdgeTTS
9
- Was undermaintained and had dependency issues, so I forked the repo and fixed them.
11
+ ## Installation
10
12
 
11
- An simple Azure Speech Service module that uses the Microsoft Edge Read Aloud API.
12
-
13
- Only supports `speak`, `voice`, and `prosody` element types. The following is the default SSML object:
14
-
15
- ```xml
16
- <speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts"
17
- xml:lang="${this._voiceLang}">
18
- <voice name="${voiceName}">
19
- <prosody rate="${rate}" pitch="${pitch}" volume="${volume}">
20
- ${input}
21
- </prosody>
22
- </voice>
23
- </speak>
13
+ ```bash
14
+ npm install @bestcodes/edge-tts
15
+ # or
16
+ bun add @bestcodes/edge-tts
24
17
  ```
25
18
 
26
- Documentation on the SSML
27
- format [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/speech-synthesis-markup)
28
- . All supported audio formats [can be found here](./src/OUTPUT_FORMAT.ts).
19
+ ## Quick Start
29
20
 
30
- ## Example usage
21
+ ### Get audio buffer
31
22
 
32
- Make sure to **escape/sanitize** your user's input!
33
- Use a library like [xml-escape](https://www.npmjs.com/package/xml-escape).
23
+ ```ts
24
+ import { streamSpeech } from "@bestcodes/edge-tts";
34
25
 
35
- ### Write to stream
26
+ const audio = await streamSpeech({
27
+ text: "Hello, world!",
28
+ voice: "en-US-EmmaMultilingualNeural",
29
+ });
36
30
 
37
- ```js
38
- import { MsEdgeTTS, OUTPUT_FORMAT } from "msedge-tts";
31
+ // Do something with the audio buffer
32
+ ```
39
33
 
40
- const tts = new MsEdgeTTS();
41
- await tts.setMetadata(
42
- "en-IE-ConnorNeural",
43
- OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
44
- );
45
- const readable = tts.toStream("Hi, how are you?");
34
+ ### Save to file
46
35
 
47
- readable.on("data", (data) => {
48
- console.log("DATA RECEIVED", data);
49
- // raw audio file data
50
- });
36
+ ```ts
37
+ import { streamSpeechToFile } from "@bestcodes/edge-tts";
51
38
 
52
- readable.on("close", () => {
53
- console.log("STREAM CLOSED");
39
+ await streamSpeechToFile({
40
+ text: "Hello, world!",
41
+ outputPath: "./output.mp3",
54
42
  });
55
43
  ```
56
44
 
57
- ### Write to file
45
+ ### With subtitles
58
46
 
59
- ```js
60
- import { MsEdgeTTS, OUTPUT_FORMAT } from "msedge-tts";
47
+ ```ts
48
+ import { streamSpeechWithSubtitles } from "@bestcodes/edge-tts";
61
49
 
62
- (async () => {
63
- const tts = new MsEdgeTTS();
64
- await tts.setMetadata(
65
- "en-US-AriaNeural",
66
- OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
67
- );
68
- const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
69
- })();
50
+ const { audio, subtitles } = await streamSpeechWithSubtitles({
51
+ text: "This text will have subtitles.",
52
+ subtitlePath: "./subtitles.srt",
53
+ });
70
54
  ```
71
55
 
72
- ### Change voice rate, pitch and volume
73
-
74
- ```js
75
- import { MsEdgeTTS, OUTPUT_FORMAT } from "msedge-tts";
76
-
77
- (async () => {
78
- const tts = new MsEdgeTTS();
79
- await tts.setMetadata(
80
- "en-US-AriaNeural",
81
- OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
82
- );
83
- const filePath = await tts.toFile(
84
- "./example_audio.webm",
85
- "Hi, how are you?",
86
- { rate: 0.5, pitch: "+200Hz" }
87
- );
88
- })();
56
+ ## Options
57
+
58
+ ```ts
59
+ {
60
+ text: string; // Required: text to convert
61
+ voice?: string; // Default: "en-US-EmmaMultilingualNeural"
62
+ rate?: string; // e.g. "+10%" or "-20%" (default: "+0%")
63
+ volume?: string; // e.g. "+50%" or "-10%" (default: "+0%")
64
+ pitch?: string; // e.g. "+10Hz" or "-5Hz" (default: "+0Hz")
65
+ boundary?: "WordBoundary" | "SentenceBoundary";
66
+ proxy?: string; // Optional proxy URL
67
+ connectTimeoutSeconds?: number; // Default: 10
68
+ receiveTimeoutSeconds?: number; // Default: 60
69
+ outputPath?: string; // For streamSpeechToFile
70
+ subtitlePath?: string; // For streamSpeechWithSubtitles
71
+ }
89
72
  ```
90
73
 
91
- ### Use an alternative HTTP Agent
74
+ ## List voices
92
75
 
93
- Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
76
+ ```ts
77
+ import { getVoices, findVoices } from "@bestcodes/edge-tts";
94
78
 
95
- ```js
96
- import { SocksProxyAgent } from "socks-proxy-agent";
79
+ // Get all voices
80
+ const allVoices = await getVoices();
97
81
 
98
- (async () => {
99
- const agent = new SocksProxyAgent(
100
- "socks://your-name%40gmail.com:abcdef12345124@br41.nordvpn.com"
101
- );
102
- const tts = new MsEdgeTTS(agent);
103
- await tts.setMetadata(
104
- "en-US-AriaNeural",
105
- OUTPUT_FORMAT.WEBM_24KHZ_16BIT_MONO_OPUS
106
- );
107
- const filePath = await tts.toFile("./example_audio.webm", "Hi, how are you?");
108
- })();
82
+ // Find specific voices
83
+ const femaleVoices = await findVoices({ Gender: "Female" });
84
+ const englishVoices = await findVoices({ Locale: "en-US" });
109
85
  ```
110
86
 
111
- ## API
87
+ ## Low-level API
88
+
89
+ ```ts
90
+ import { Raw } from "@bestcodes/edge-tts";
91
+
92
+ const communicate = new Raw.Communicate(
93
+ "Hello!",
94
+ "en-US-EmmaMultilingualNeural",
95
+ );
96
+
97
+ for await (const chunk of communicate.stream()) {
98
+ if (chunk.type === "audio") {
99
+ // Process audio data (chunk.data is a Buffer)
100
+ } else if (chunk.type === "WordBoundary") {
101
+ // Word boundary metadata
102
+ }
103
+ }
104
+ ```
112
105
 
113
- For the full documentation check out the [API Documentation](https://migushthe2nd.github.io/MsEdgeTTS).
106
+ ## License
114
107
 
115
- This library only supports promises.
108
+ MIT
@@ -0,0 +1,187 @@
1
+ // Generated by dts-bundle-generator v9.5.1
2
+
3
+ export type ChunkType = "audio" | "WordBoundary" | "SentenceBoundary";
4
+ export interface TTSChunk {
5
+ type: ChunkType;
6
+ data?: Buffer;
7
+ duration?: number;
8
+ offset?: number;
9
+ text?: string;
10
+ }
11
+ export interface VoiceTag {
12
+ ContentCategories: string[];
13
+ VoicePersonalities: string[];
14
+ }
15
+ export interface Voice {
16
+ Name: string;
17
+ ShortName: string;
18
+ Gender: "Female" | "Male";
19
+ Locale: string;
20
+ SuggestedCodec: string;
21
+ FriendlyName: string;
22
+ Status: "Deprecated" | "GA" | "Preview";
23
+ VoiceTag: VoiceTag;
24
+ }
25
+ export interface VoicesManagerVoice extends Voice {
26
+ Language: string;
27
+ }
28
+ export interface VoicesManagerFindOptions {
29
+ Gender?: "Female" | "Male";
30
+ Locale?: string;
31
+ Language?: string;
32
+ ShortName?: string;
33
+ }
34
+ export interface CommunicateOptions {
35
+ rate?: string;
36
+ volume?: string;
37
+ pitch?: string;
38
+ boundary?: "WordBoundary" | "SentenceBoundary";
39
+ proxy?: string;
40
+ connectTimeoutSeconds?: number;
41
+ receiveTimeoutSeconds?: number;
42
+ }
43
+ declare class Communicate {
44
+ private ttsConfig;
45
+ private texts;
46
+ private options;
47
+ private state;
48
+ constructor(text: string, voice?: string, options?: CommunicateOptions);
49
+ private parseMetadata;
50
+ private streamInternal;
51
+ stream(): AsyncGenerator<TTSChunk>;
52
+ }
53
+ declare class DRM {
54
+ private static clockSkewSeconds;
55
+ static adjustClockSkewSeconds(skewSeconds: number): void;
56
+ static getUnixTimestamp(): number;
57
+ static parseRFC2616Date(dateStr: string): number | null;
58
+ static handleClientResponseError(headers: Record<string, any>): void;
59
+ static generateSecMsGec(): string;
60
+ static generateMuid(): string;
61
+ static headersWithMuid(headers: Record<string, string>): Record<string, string>;
62
+ }
63
+ declare class SubMaker {
64
+ private cues;
65
+ private type;
66
+ feed(msg: TTSChunk): void;
67
+ getSrt(): string;
68
+ }
69
+ declare class TTSConfig {
70
+ voice: string;
71
+ rate: string;
72
+ volume: string;
73
+ pitch: string;
74
+ boundary: "WordBoundary" | "SentenceBoundary";
75
+ constructor(voice: string, rate?: string, volume?: string, pitch?: string, boundary?: "WordBoundary" | "SentenceBoundary");
76
+ private validateStringParam;
77
+ private validate;
78
+ toSSML(escapedText: string): string;
79
+ }
80
+ declare function connectId(): string;
81
+ declare function escapeXml(unsafe: string): string;
82
+ declare function unescapeXml(safe: string): string;
83
+ declare function dateToString(): string;
84
+ declare function removeIncompatibleCharacters(input: string | Buffer): string;
85
+ declare function splitTextByByteLength(text: string | Buffer, byteLength: number): Generator<Buffer>;
86
+ declare function getHeadersAndData(data: Buffer, headerLength: number): {
87
+ headers: Record<string, string>;
88
+ data: Buffer;
89
+ };
90
+ declare function listVoices(proxy?: string): Promise<Voice[]>;
91
+ declare class VoicesManager {
92
+ voices: VoicesManagerVoice[];
93
+ private createCalled;
94
+ static create(customVoices?: Voice[]): Promise<VoicesManager>;
95
+ find(options: VoicesManagerFindOptions): VoicesManagerVoice[];
96
+ }
97
+ export declare const Raw: {
98
+ Communicate: typeof Communicate;
99
+ SubMaker: typeof SubMaker;
100
+ VoicesManager: typeof VoicesManager;
101
+ listVoices: typeof listVoices;
102
+ DRM: typeof DRM;
103
+ TTSConfig: typeof TTSConfig;
104
+ utils: typeof utils;
105
+ };
106
+ /**
107
+ * Options for streaming speech.
108
+ */
109
+ export interface StreamSpeechOptions {
110
+ /** The text to convert to speech */
111
+ text: string;
112
+ /** Voice to use for synthesis (default: "en-US-EmmaMultilingualNeural") */
113
+ voice?: string;
114
+ /** Speaking rate in percentage, e.g., "+10%" or "-20%" (default: "+0%") */
115
+ rate?: string;
116
+ /** Volume in percentage, e.g., "+50%" or "-10%" (default: "+0%") */
117
+ volume?: string;
118
+ /** Pitch in hertz, e.g., "+10Hz" or "-5Hz" (default: "+0Hz") */
119
+ pitch?: string;
120
+ /** Boundary type for metadata: "WordBoundary" or "SentenceBoundary" (default: "SentenceBoundary") */
121
+ boundary?: "WordBoundary" | "SentenceBoundary";
122
+ /** Proxy URL for the WebSocket connection */
123
+ proxy?: string;
124
+ /** Connection timeout in seconds (default: 10) */
125
+ connectTimeoutSeconds?: number;
126
+ /** Receive timeout in seconds (default: 60) */
127
+ receiveTimeoutSeconds?: number;
128
+ }
129
+ /**
130
+ * Options for generating speech and writing to a file.
131
+ */
132
+ export interface GenerateSpeechOptions extends StreamSpeechOptions {
133
+ /** Output file path for the generated audio */
134
+ outputPath: string;
135
+ }
136
+ /**
137
+ * Options for streaming speech with subtitles.
138
+ */
139
+ export interface StreamSpeechWithSubtitlesOptions extends StreamSpeechOptions {
140
+ /** Output file path for the generated subtitles (SRT format) */
141
+ subtitlePath: string;
142
+ }
143
+ /**
144
+ * Stream speech audio from text.
145
+ * @param options - Configuration options for speech synthesis
146
+ * @returns Promise resolving to the audio buffer
147
+ */
148
+ export declare function streamSpeech(options: StreamSpeechOptions): Promise<Buffer>;
149
+ /**
150
+ * Generate speech from text and save it to a file.
151
+ * @param options - Configuration options including the output file path
152
+ * @returns Promise that resolves when the file is written
153
+ */
154
+ export declare function streamSpeechToFile(options: GenerateSpeechOptions): Promise<void>;
155
+ /**
156
+ * Stream speech audio with generated subtitles.
157
+ * @param options - Configuration options including the subtitle file path
158
+ * @returns Promise resolving to the audio buffer and subtitles string
159
+ */
160
+ export declare function streamSpeechWithSubtitles(options: StreamSpeechWithSubtitlesOptions): Promise<{
161
+ audio: Buffer;
162
+ subtitles: string;
163
+ }>;
164
+ /**
165
+ * Get the list of available voices from the Edge TTS service.
166
+ * @param proxy - Optional proxy URL for the HTTP request
167
+ * @returns Promise resolving to an array of voice objects
168
+ */
169
+ export declare function getVoices(proxy?: string): Promise<Voice[]>;
170
+ /**
171
+ * Find voices matching specific criteria.
172
+ * @param options - Filter options for finding voices
173
+ * @param proxy - Optional proxy URL for the HTTP request
174
+ * @returns Promise resolving to an array of matching voice objects
175
+ */
176
+ export declare function findVoices(options: {
177
+ Gender?: "Female" | "Male";
178
+ Locale?: string;
179
+ Language?: string;
180
+ ShortName?: string;
181
+ }, proxy?: string): Promise<VoicesManagerVoice[]>;
182
+
183
+ declare namespace utils {
184
+ export { connectId, dateToString, escapeXml, getHeadersAndData, removeIncompatibleCharacters, splitTextByByteLength, unescapeXml };
185
+ }
186
+
187
+ export {};
package/dist/index.mjs ADDED
@@ -0,0 +1,17 @@
1
+ var t=Object.defineProperty;var a=(E,S)=>{for(var $ in S)t(E,$,{get:S[$],enumerable:!0,configurable:!0,set:(r)=>S[$]=()=>r})};import*as J from"fs";import*as l from"path";import{HttpsProxyAgent as U0}from"https-proxy-agent";import _0 from"ws";var v="6A5AA1D4EAFF4E9FB37E23D68491D6F4",m="wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=6A5AA1D4EAFF4E9FB37E23D68491D6F4",g="https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=6A5AA1D4EAFF4E9FB37E23D68491D6F4",u="en-US-EmmaMultilingualNeural";var L="130.0.2849.68".split(".")[0],Z="1-130.0.2849.68",p={"User-Agent":`Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/${L}.0.0.0 Safari/537.36 Edg/${L}.0.0.0`,"Accept-Encoding":"gzip, deflate, br, zstd","Accept-Language":"en-US,en;q=0.9"},i={...p,Pragma:"no-cache","Cache-Control":"no-cache",Origin:"chrome-extension://jdiccldimpdaibmpdkjnbmckianbfold","Sec-WebSocket-Version":"13"},d={...p,Authority:"speech.platform.bing.com","Sec-CH-UA":`" Not;A Brand";v="99", "Microsoft Edge";v="${L}", "Chromium";v="${L}"`,"Sec-CH-UA-Mobile":"?0",Accept:"*/*","Sec-Fetch-Site":"none","Sec-Fetch-Mode":"cors","Sec-Fetch-Dest":"empty"};import*as j from"crypto";class M extends Error{constructor(E){super(E);this.name="EdgeTTSException"}}class f extends M{constructor(E){super(E);this.name="UnknownResponse"}}class I extends M{constructor(E){super(E);this.name="UnexpectedResponse"}}class n extends M{constructor(E){super(E);this.name="NoAudioReceived"}}class Q extends M{constructor(E){super(E);this.name="SkewAdjustmentError"}}var e=11644473600;class N{static clockSkewSeconds=0;static adjustClockSkewSeconds(E){N.clockSkewSeconds+=E}static getUnixTimestamp(){return Date.now()/1000+N.clockSkewSeconds}static parseRFC2616Date(E){let S=Date.parse(E);if(isNaN(S))return null;return S/1000}static handleClientResponseError(E){let S=E.date||E.Date;if(!S||typeof S!=="string")throw new Q("No server date in headers.");let $=N.parseRFC2616Date(S);if($===null)throw new Q(`Failed to parse server date: ${S}`);let r=N.getUnixTimestamp();N.adjustClockSkewSeconds($-r)}static generateSecMsGec(){let E=N.getUnixTimestamp();E+=e,E-=E%300,E*=1e7;let S=`${E.toFixed(0)}${v}`;return j.createHash("sha256").update(S,"ascii").digest("hex").toUpperCase()}static generateMuid(){return j.randomBytes(16).toString("hex").toUpperCase()}static headersWithMuid(E){return{...E,Cookie:`muid=${N.generateMuid()};`}}}class R{voice;rate;volume;pitch;boundary;constructor(E,S="+0%",$="+0%",r="+0Hz",U="SentenceBoundary"){this.voice=E,this.rate=S,this.volume=$,this.pitch=r,this.boundary=U,this.validate()}validateStringParam(E,S,$){if(!$.test(S))throw Error(`Invalid ${E} '${S}'.`)}validate(){let E=this.voice.match(/^([a-z]{2,})-([A-Z]{2,})-(.+Neural)$/);if(E){let S=E[1],$=E[2],r=E[3];if(r&&r.includes("-"))$=`${$}-${r.split("-")[0]}`,r=r.split("-")[1];this.voice=`Microsoft Server Speech Text to Speech Voice (${S}-${$}, ${r})`}this.validateStringParam("voice",this.voice,/^Microsoft Server Speech Text to Speech Voice \(.+,.+\)$/),this.validateStringParam("rate",this.rate,/^[+-]\d+%$/),this.validateStringParam("volume",this.volume,/^[+-]\d+%$/),this.validateStringParam("pitch",this.pitch,/^[+-]\d+Hz$/)}toSSML(E){return`<speak version='1.0' xmlns='http://www.w3.org/2001/10/synthesis' xml:lang='en-US'><voice name='${this.voice}'><prosody pitch='${this.pitch}' rate='${this.rate}' volume='${this.volume}'>${E}</prosody></voice></speak>`}}var b={};a(b,{unescapeXml:()=>y,splitTextByByteLength:()=>h,removeIncompatibleCharacters:()=>c,getHeadersAndData:()=>k,escapeXml:()=>x,dateToString:()=>P,connectId:()=>D});import{v4 as E0}from"uuid";function D(){return E0().replace(/-/g,"")}function x(E){return E.replace(/[<>&'"]/g,(S)=>{switch(S){case"<":return"&lt;";case">":return"&gt;";case"&":return"&amp;";case"'":return"&apos;";case'"':return"&quot;";default:return S}})}function y(E){return E.replace(/&lt;/g,"<").replace(/&gt;/g,">").replace(/&amp;/g,"&").replace(/&apos;/g,"'").replace(/&quot;/g,'"')}function P(){return new Date().toUTCString()}function c(E){return(Buffer.isBuffer(E)?E.toString("utf-8"):E).split("").map(($)=>{let r=$.charCodeAt(0);if(r>=0&&r<=8||r>=11&&r<=12||r>=14&&r<=31)return" ";return $}).join("")}function S0(E,S){let $=E.subarray(0,S),r=$.lastIndexOf(10);if(r<0)r=$.lastIndexOf(32);return r}function $0(E){let S=E.length;while(S>0){let $=E.subarray(0,S);try{let r=$[$.length-1];if(r!==void 0&&(r&128)===0)return S;let U=$.toString("utf-8");if(Buffer.from(U).length===$.length&&!U.endsWith("�"))return S;S--}catch(r){S--}}return S}function r0(E,S){let $=E.subarray(0,S),r=$.lastIndexOf(38);if(r>-1){if($.indexOf(59,r)===-1)return r}return S}function*h(E,S){let $=Buffer.isBuffer(E)?E:Buffer.from(E,"utf-8");if(S<=0)throw Error("byteLength must be > 0");while($.length>S){let U=S0($,S);if(U<0)U=$0($.subarray(0,S));if(U=r0($,U),U<=0)throw Error("Maximum byte length too small for text structure");let F=$.subarray(0,U).toString("utf-8").trim();if(F.length>0)yield Buffer.from(F);$=$.subarray(U+(U>0?0:1))}let r=$.toString("utf-8").trim();if(r.length>0)yield Buffer.from(r)}function k(E,S){let $={},r=E.subarray(0,S),U=E.subarray(S+2),_=r.toString("utf-8").split(`\r
2
+ `);for(let F of _){let[O,H]=F.split(":",2);if(O&&H)$[O.trim()]=H.trim()}return{headers:$,data:U}}class X{ttsConfig;texts;options;state;constructor(E,S=u,$={}){this.options={rate:"+0%",volume:"+0%",pitch:"+0Hz",boundary:"SentenceBoundary",connectTimeoutSeconds:10,receiveTimeoutSeconds:60,...$},this.ttsConfig=new R(S,this.options.rate,this.options.volume,this.options.pitch,this.options.boundary);let r=c(E),U=x(r);this.texts=Array.from(h(U,4096)),this.state={partialText:Buffer.alloc(0),offsetCompensation:0,lastDurationOffset:0,streamWasCalled:!1}}parseMetadata(E){let S=E.toString("utf-8"),$=JSON.parse(S);for(let r of $.Metadata){let U=r.Type;if(U==="WordBoundary"||U==="SentenceBoundary"){let _=r.Data.Offset+this.state.offsetCompensation,F=r.Data.Duration;return{type:U,offset:_,duration:F,text:y(r.Data.text.Text)}}if(U==="SessionEnd")continue;throw new f(`Unknown metadata type: ${U}`)}throw new I("No boundary metadata found")}async*streamInternal(E){let S=this,$=!1,r=()=>{let W=S.ttsConfig.boundary==="WordBoundary",w=W?"true":"false",C=!W?"true":"false",K=`X-Timestamp:${P()}\r
3
+ Content-Type:application/json; charset=utf-8\r
4
+ Path:speech.config\r
5
+ \r
6
+ {"context":{"synthesis":{"audio":{"metadataoptions":{"sentenceBoundaryEnabled":"${C}","wordBoundaryEnabled":"${w}"},"outputFormat":"audio-24khz-48kbitrate-mono-mp3"}}}}`;E.send(K)},U=()=>{let W=D(),w=P(),C=S.ttsConfig.toSSML(S.state.partialText.toString("utf-8")),K=`X-RequestId:${W}\r
7
+ Content-Type:application/ssml+xml\r
8
+ X-Timestamp:${w}Z\r
9
+ Path:ssml\r
10
+ \r
11
+ `+C;E.send(K)};r(),U();let _=[],F=null,O=null,H=!1;E.on("message",(W,w)=>{if(H)return;if(_.push({data:W,isBinary:w}),F)F(),F=null}),E.on("error",(W)=>{if(H)return;if(O)O(W);H=!0}),E.on("close",()=>{if(H=!0,F)F()});while(!H||_.length>0){if(_.length===0){if(await new Promise((C,K)=>{F=C,O=K}),H&&_.length===0)break}let{data:W,isBinary:w}=_.shift();if(!w){let C=Buffer.from(W),K=C.indexOf(`\r
12
+ \r
13
+ `),{headers:B,data:T}=k(C,K),z=B.Path;if(z==="audio.metadata"){let G=S.parseMetadata(T);yield G,S.state.lastDurationOffset=(G.offset||0)+(G.duration||0)}else if(z==="turn.end"){S.state.offsetCompensation=S.state.lastDurationOffset+8750000;break}}else{let C=W;if(C.length<2)throw new I("Binary message too short for header length");let K=C.readUInt16BE(0);if(K>C.length)throw new I("Header length greater than data length");let{headers:B,data:T}=k(C,K);if(B.Path!=="audio")throw new I("Binary message path is not audio");let z=B["Content-Type"];if(!z&&T.length===0)continue;if(z!=="audio/mpeg"&&z!==void 0)throw new I(`Unexpected Content-Type: ${z}`);if(!z&&T.length>0)throw new I("No Content-Type but got data");if(T.length===0)throw new I("Audio data is empty");$=!0,yield{type:"audio",data:T}}}if(!$)throw new n("No audio received from service.")}async*stream(){if(this.state.streamWasCalled)throw Error("stream() can only be called once.");this.state.streamWasCalled=!0;let E=this.options.proxy?new U0(this.options.proxy):void 0,S=`${m}&ConnectionId=${D()}&Sec-MS-GEC=${N.generateSecMsGec()}&Sec-MS-GEC-Version=${Z}`;for(let $ of this.texts){this.state.partialText=$;let r=0;while(!0)try{let U=new _0(S,{headers:N.headersWithMuid(i),agent:E,perMessageDeflate:!1});await new Promise((F,O)=>{U.once("open",F),U.once("error",O)});let _=this.streamInternal(U);for await(let F of _)yield F;U.close();break}catch(U){if(U.name==="UnexpectedServerResponse"&&U.code===403&&r===0)throw U;throw U}}}}function o(E){let S=new Date(E),$=Math.floor(E/3600000),r=S.getUTCMinutes(),U=S.getUTCSeconds(),_=S.getUTCMilliseconds();return`${$.toString().padStart(2,"0")}:${r.toString().padStart(2,"0")}:${U.toString().padStart(2,"0")},${_.toString().padStart(3,"0")}`}class V{cues=[];type=null;feed(E){if(E.type!=="WordBoundary"&&E.type!=="SentenceBoundary")throw Error("Invalid message type, expected 'WordBoundary' or 'SentenceBoundary'.");if(this.type===null)this.type=E.type;else if(this.type!==E.type)throw Error(`Expected message type '${this.type}', but got '${E.type}'.`);if(E.offset===void 0||E.duration===void 0||E.text===void 0)return;let S=E.offset/1e4,$=E.duration/1e4;this.cues.push({index:this.cues.length+1,start:S,end:S+$,content:E.text})}getSrt(){return this.cues.map((E)=>{return`${E.index}
14
+ ${o(E.start)} --> ${o(E.end)}
15
+ ${E.content}
16
+
17
+ `}).join("")}}import s from"axios";import{HttpsProxyAgent as F0}from"https-proxy-agent";async function N0(E){let S=`${g}&Sec-MS-GEC=${N.generateSecMsGec()}&Sec-MS-GEC-Version=${Z}`,$=N.headersWithMuid(d),r=E?new F0(E):void 0,U=await s.get(S,{headers:$,httpsAgent:r,proxy:!1,validateStatus:(_)=>_<500});if(U.status===403){N.handleClientResponseError(U.headers);let _=`${g}&Sec-MS-GEC=${N.generateSecMsGec()}&Sec-MS-GEC-Version=${Z}`;return(await s.get(_,{headers:N.headersWithMuid(d),httpsAgent:r,proxy:!1})).data}if(U.status>=400)throw Error(`Failed to list voices: ${U.status} ${U.statusText}`);return U.data}async function q(E){let S=await N0(E);return S.forEach(($)=>{if(!$.VoiceTag)$.VoiceTag={ContentCategories:[],VoicePersonalities:[]};if(!$.VoiceTag.ContentCategories)$.VoiceTag.ContentCategories=[];if(!$.VoiceTag.VoicePersonalities)$.VoiceTag.VoicePersonalities=[]}),S}class A{voices=[];createCalled=!1;static async create(E){let S=new A,$=E||await q();return S.voices=$.map((r)=>({...r,Language:r.Locale.split("-")[0]??""})),S.createCalled=!0,S}find(E){if(!this.createCalled)throw Error("VoicesManager.find() called before VoicesManager.create()");return this.voices.filter((S)=>{let $=!0;if(E.Gender&&S.Gender!==E.Gender)$=!1;if(E.Locale&&S.Locale!==E.Locale)$=!1;if(E.Language&&S.Language!==E.Language)$=!1;if(E.ShortName&&S.ShortName!==E.ShortName)$=!1;return $})}}var f0={Communicate:X,SubMaker:V,VoicesManager:A,listVoices:q,DRM:N,TTSConfig:R,utils:b};async function C0(E){let{text:S,voice:$="en-US-EmmaMultilingualNeural",rate:r="+0%",volume:U="+0%",pitch:_="+0Hz",boundary:F="SentenceBoundary",proxy:O,connectTimeoutSeconds:H,receiveTimeoutSeconds:W}=E,w=new X(S,$,{rate:r,volume:U,pitch:_,boundary:F,proxy:O,connectTimeoutSeconds:H,receiveTimeoutSeconds:W}),C=[];for await(let K of w.stream())if(K.type==="audio"&&K.data)C.push(K.data);return Buffer.concat(C)}async function n0(E){let{outputPath:S,...$}=E,r=await C0($),U=l.dirname(S);if(!J.existsSync(U))J.mkdirSync(U,{recursive:!0});J.writeFileSync(S,r)}async function x0(E){let{text:S,voice:$="en-US-EmmaMultilingualNeural",rate:r="+0%",volume:U="+0%",pitch:_="+0Hz",boundary:F="WordBoundary",proxy:O,connectTimeoutSeconds:H,receiveTimeoutSeconds:W,subtitlePath:w}=E,C=new X(S,$,{rate:r,volume:U,pitch:_,boundary:F,proxy:O,connectTimeoutSeconds:H,receiveTimeoutSeconds:W}),K=new V,B=[];for await(let Y of C.stream())if(Y.type==="audio"&&Y.data)B.push(Y.data);else if(Y.type==="WordBoundary"||Y.type==="SentenceBoundary")K.feed(Y);let T=Buffer.concat(B),z=K.getSrt(),G=l.dirname(w);if(!J.existsSync(G))J.mkdirSync(G,{recursive:!0});return J.writeFileSync(w,z),{audio:T,subtitles:z}}async function y0(E){return q(E)}async function c0(E,S){return(await A.create(S?await q(S):void 0)).find(E)}export{x0 as streamSpeechWithSubtitles,n0 as streamSpeechToFile,C0 as streamSpeech,y0 as getVoices,c0 as findVoices,f0 as Raw};
package/package.json CHANGED
@@ -1,50 +1,58 @@
1
1
  {
2
2
  "name": "@bestcodes/edge-tts",
3
- "version": "1.0.2",
4
- "description": "An Azure Speech Service module that uses the Microsoft Edge Read Aloud API.",
5
- "author": "The-Best-Codes",
6
- "license": "MIT",
3
+ "description": "Free in-browser Text-to-Speech using Microsoft Edge TTS",
7
4
  "repository": {
8
5
  "type": "git",
9
- "url": "https://github.com/The-Best-Codes/edge-tts.git"
10
- },
11
- "bugs": {
12
- "url": "https://github.com/The-Best-Codes/edge-tts/issues"
6
+ "url": "git+https://github.com/The-Best-Codes/edge-tts.git"
13
7
  },
14
- "homepage": "https://github.com/The-Best-Codes/edge-tts#readme",
15
8
  "keywords": [
16
9
  "tts",
17
- "text-to-speech",
10
+ "text to speech",
18
11
  "speech",
19
- "text",
20
- "voice",
21
- "ssml",
22
- "azure",
23
- "speech-synthesis",
24
- "readaloud",
25
12
  "edge",
26
- "microsoft"
13
+ "edgetts"
27
14
  ],
28
- "module": "./dist/index",
29
- "main": "./dist/index",
15
+ "license": "MIT",
16
+ "bugs": {
17
+ "url": "https://github.com/The-Best-Codes/edge-tts/issues"
18
+ },
19
+ "files": [
20
+ "dist",
21
+ "package.json",
22
+ "README.md"
23
+ ],
24
+ "homepage": "https://github.com/The-Best-Codes/edge-tts",
25
+ "version": "2.0.0",
26
+ "author": {
27
+ "name": "The-Best-Codes",
28
+ "url": "https://bestcodes.dev"
29
+ },
30
+ "module": "dist/index.mjs",
31
+ "types": "dist/index.d.ts",
32
+ "type": "module",
30
33
  "scripts": {
31
- "minify": "find dist/ -name '*.js' -type f -exec terser {} --compress --mangle --output {} \\;",
32
- "build": "tsc --project tsconfig.json --outDir dist && npm run minify",
33
- "prepublishOnly": "npm run build",
34
- "depclean": "sh depclean.sh",
35
- "update": "npm update --save && npm update --save-dev && npx npm-check-updates@latest -u && npm install",
36
- "maintain": "npm run depclean && npm run update"
34
+ "prepublishOnly": "bun run b",
35
+ "b": "rm -rf dist && bun run build:main && bun run build:types",
36
+ "build:main": "bun build src/index.ts --outfile dist/index.mjs --format esm --packages external --minify --target browser",
37
+ "build:types": "dts-bundle-generator src/index.ts -o dist/index.d.ts --external-imports",
38
+ "typecheck": "tsc -b --incremental --noEmit",
39
+ "fmt": "prettier . -w --cache --cache-strategy metadata"
37
40
  },
38
41
  "devDependencies": {
39
- "@types/node": "^22.8.1",
40
- "@types/randombytes": "^2.0.3",
41
- "axios": "^1.7.7",
42
- "buffer": "^6.0.3",
43
- "isomorphic-ws": "^5.0.0",
44
- "randombytes": "^2.1.0",
45
- "terser": "^5.36.0"
42
+ "@types/bun": "^1.3.5",
43
+ "@types/node": "^25.0.3",
44
+ "@types/uuid": "^11.0.0",
45
+ "@types/ws": "^8.18.1",
46
+ "dts-bundle-generator": "^9.5.1",
47
+ "prettier": "^3.7.4"
46
48
  },
47
- "files": [
48
- "dist/"
49
- ]
49
+ "peerDependencies": {
50
+ "typescript": "^5.9.3"
51
+ },
52
+ "dependencies": {
53
+ "axios": "^1.13.2",
54
+ "https-proxy-agent": "^7.0.6",
55
+ "uuid": "^13.0.0",
56
+ "ws": "^8.18.3"
57
+ }
50
58
  }
package/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2024 The-Best-Codes
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
package/dist/MsEdgeTTS.js DELETED
@@ -1 +0,0 @@
1
- "use strict";var __assign=this&&this.__assign||function(){return __assign=Object.assign||function(t){for(var e,n=1,o=arguments.length;n<o;n++)for(var a in e=arguments[n])Object.prototype.hasOwnProperty.call(e,a)&&(t[a]=e[a]);return t},__assign.apply(this,arguments)},__createBinding=this&&this.__createBinding||(Object.create?function(t,e,n,o){void 0===o&&(o=n);var a=Object.getOwnPropertyDescriptor(e,n);a&&!("get"in a?!e.__esModule:a.writable||a.configurable)||(a={enumerable:!0,get:function(){return e[n]}}),Object.defineProperty(t,o,a)}:function(t,e,n,o){void 0===o&&(o=n),t[o]=e[n]}),__setModuleDefault=this&&this.__setModuleDefault||(Object.create?function(t,e){Object.defineProperty(t,"default",{enumerable:!0,value:e})}:function(t,e){t.default=e}),__importStar=this&&this.__importStar||function(t){if(t&&t.__esModule)return t;var e={};if(null!=t)for(var n in t)"default"!==n&&Object.prototype.hasOwnProperty.call(t,n)&&__createBinding(e,t,n);return __setModuleDefault(e,t),e},__awaiter=this&&this.__awaiter||function(t,e,n,o){return new(n||(n=Promise))((function(a,r){function s(t){try{c(o.next(t))}catch(t){r(t)}}function i(t){try{c(o.throw(t))}catch(t){r(t)}}function c(t){var e;t.done?a(t.value):(e=t.value,e instanceof n?e:new n((function(t){t(e)}))).then(s,i)}c((o=o.apply(t,e||[])).next())}))},__generator=this&&this.__generator||function(t,e){var n,o,a,r={label:0,sent:function(){if(1&a[0])throw a[1];return a[1]},trys:[],ops:[]},s=Object.create(("function"==typeof Iterator?Iterator:Object).prototype);return s.next=i(0),s.throw=i(1),s.return=i(2),"function"==typeof Symbol&&(s[Symbol.iterator]=function(){return this}),s;function i(i){return function(c){return function(i){if(n)throw new TypeError("Generator is already executing.");for(;s&&(s=0,i[0]&&(r=0)),r;)try{if(n=1,o&&(a=2&i[0]?o.return:i[0]?o.throw||((a=o.return)&&a.call(o),0):o.next)&&!(a=a.call(o,i[1])).done)return a;switch(o=0,a&&(i=[2&i[0],a.value]),i[0]){case 0:case 1:a=i;break;case 4:return r.label++,{value:i[1],done:!1};case 5:r.label++,o=i[1],i=[0];continue;case 7:i=r.ops.pop(),r.trys.pop();continue;default:if(!(a=r.trys,(a=a.length>0&&a[a.length-1])||6!==i[0]&&2!==i[0])){r=0;continue}if(3===i[0]&&(!a||i[1]>a[0]&&i[1]<a[3])){r.label=i[1];break}if(6===i[0]&&r.label<a[1]){r.label=a[1],a=i;break}if(a&&r.label<a[2]){r.label=a[2],r.ops.push(i);break}a[2]&&r.ops.pop(),r.trys.pop();continue}i=e.call(t,r)}catch(t){i=[6,t],o=0}finally{n=a=0}if(5&i[0])throw i[1];return{value:i[0]?i[1]:void 0,done:!0}}([i,c])}}},__importDefault=this&&this.__importDefault||function(t){return t&&t.__esModule?t:{default:t}};Object.defineProperty(exports,"__esModule",{value:!0}),exports.MsEdgeTTS=exports.MetadataOptions=exports.ProsodyOptions=void 0;var axios_1=__importDefault(require("axios")),isomorphic_ws_1=__importDefault(require("isomorphic-ws")),buffer_1=require("buffer/"),randombytes_1=__importDefault(require("randombytes")),OUTPUT_FORMAT_1=require("./OUTPUT_FORMAT"),stream_1=require("stream"),fs=__importStar(require("fs")),ProsodyOptions=function(){this.pitch="+0Hz",this.rate=1,this.volume=100};exports.ProsodyOptions=ProsodyOptions;var messageTypes,MetadataOptions=function(){this.sentenceBoundaryEnabled=!1,this.wordBoundaryEnabled=!1};exports.MetadataOptions=MetadataOptions,function(t){t.TURN_START="turn.start",t.TURN_END="turn.end",t.RESPONSE="response",t.SPEECH_CONFIG="speech.config",t.AUDIO_METADATA="audio.metadata",t.AUDIO="audio",t.SSML="ssml"}(messageTypes||(messageTypes={}));var MsEdgeTTS=function(){function t(t,e){void 0===e&&(e=!1),this._metadataOptions=new MetadataOptions,this._streams={},this._startTime=0,this._agent=t,this._enableLogger=e,this._isBrowser="undefined"!=typeof window&&void 0!==window.document}return t.prototype._log=function(){for(var t=[],e=0;e<arguments.length;e++)t[e]=arguments[e];this._enableLogger&&console.log.apply(console,t)},t.prototype._send=function(t){return __awaiter(this,void 0,void 0,(function(){var e,n=this;return __generator(this,(function(o){switch(o.label){case 0:e=1,o.label=1;case 1:return e<=3&&this._ws.readyState!==this._ws.OPEN?(1==e&&(this._startTime=Date.now()),this._log("connecting: ",e),[4,this._initClient()]):[3,4];case 2:o.sent(),o.label=3;case 3:return e++,[3,1];case 4:return this._ws.send(t,(function(){n._log("<-",t)})),[2]}}))}))},t.prototype._initClient=function(){var e=this;return this._ws=this._isBrowser?new isomorphic_ws_1.default(t.SYNTH_URL):new isomorphic_ws_1.default(t.SYNTH_URL,{agent:this._agent}),this._ws.binaryType="arraybuffer",new Promise((function(n,o){e._ws.onopen=function(){e._log("Connected in",(Date.now()-e._startTime)/1e3,"seconds"),e._send("Content-Type:application/json; charset=utf-8\r\nPath:".concat(messageTypes.SPEECH_CONFIG).concat(t.JSON_XML_DELIM,'\n {\n "context": {\n "synthesis": {\n "audio": {\n "metadataoptions": {\n "sentenceBoundaryEnabled": "').concat(e._metadataOptions.sentenceBoundaryEnabled,'",\n "wordBoundaryEnabled": "').concat(e._metadataOptions.wordBoundaryEnabled,'"\n },\n "outputFormat": "').concat(e._outputFormat,'" \n }\n }\n }\n }\n ')).then(n)},e._ws.onmessage=function(n){var o,a=buffer_1.Buffer.from(n.data),r=a.toString(),s=(null===(o=/X-RequestId:(.*?)\r\n/gm.exec(r))||void 0===o?void 0:o[1])||"";if(r.includes("Path:".concat(messageTypes.TURN_START)))e._log("->",r);else if(r.includes("Path:".concat(messageTypes.TURN_END)))e._log("->",r),e._streams[s].audio.push(null);else if(r.includes("Path:".concat(messageTypes.RESPONSE)))e._log("->",r);else if(r.includes("Path:".concat(messageTypes.AUDIO_METADATA))){var i=a.indexOf(t.JSON_XML_DELIM)+t.JSON_XML_DELIM.length,c=a.subarray(i);e._log("->",r),e._pushMetadata(c,s)}else if(r.includes("Path:".concat(messageTypes.AUDIO))&&n.data instanceof ArrayBuffer){i=a.indexOf(t.AUDIO_DELIM)+t.AUDIO_DELIM.length;var u=a.subarray(0,i).toString();c=a.subarray(i);e._log("->",u),e._pushAudioData(c,s)}else e._log("->","UNKNOWN MESSAGE",r)},e._ws.onclose=function(){for(var t in e._log("disconnected after:",(Date.now()-e._startTime)/1e3,"seconds"),e._streams)e._streams[t].audio.push(null)},e._ws.onerror=function(t){o("Connect Error: "+JSON.stringify(t,null,2))}}))},t.prototype._pushAudioData=function(t,e){this._streams[e].audio.push(t)},t.prototype._pushMetadata=function(t,e){this._streams[e].metadata.push(t)},t.prototype._SSMLTemplate=function(t,e){return void 0===e&&(e={}),e=__assign(__assign({},new ProsodyOptions),e),'<speak version="1.0" xmlns="http://www.w3.org/2001/10/synthesis" xmlns:mstts="https://www.w3.org/2001/mstts" xml:lang="'.concat(this._metadataOptions.voiceLocale,'">\n <voice name="').concat(this._voice,'">\n <prosody pitch="').concat(e.pitch,'" rate="').concat(e.rate,'" volume="').concat(e.volume,'">\n ').concat(t,"\n </prosody> \n </voice>\n </speak>")},t.prototype.getVoices=function(){return new Promise((function(e,n){axios_1.default.get(t.VOICES_URL).then((function(t){return e(t.data)})).catch(n)}))},t.prototype.setMetadata=function(e,n,o){return __awaiter(this,void 0,void 0,(function(){var a,r,s,i;return __generator(this,(function(c){switch(c.label){case 0:if(a=this._voice,r=this._outputFormat,s=JSON.stringify(this._metadataOptions),this._voice=e,!this._metadataOptions.voiceLocale||o&&!o.voiceLocale&&a!==this._voice){if(!(i=t.VOICE_LANG_REGEX.exec(this._voice)))throw new Error("Could not infer voiceLocale from voiceName, and no voiceLocale was specified!");this._metadataOptions.voiceLocale=i[0]}return this._outputFormat=n,Object.assign(this._metadataOptions,o),a!==this._voice||r!==this._outputFormat||s!==JSON.stringify(this._metadataOptions)||this._ws.readyState!==this._ws.OPEN?(this._startTime=Date.now(),[4,this._initClient()]):[2];case 1:return c.sent(),[2]}}))}))},t.prototype._metadataCheck=function(){if(!this._ws)throw new Error("Speech synthesis not configured yet. Run setMetadata before calling toStream or toFile.")},t.prototype.close=function(){this._ws.close()},t.prototype.toFile=function(t,e,n){return this._rawSSMLRequestToFile(t,this._SSMLTemplate(e,n))},t.prototype.toStream=function(t,e){return this._rawSSMLRequest(this._SSMLTemplate(t,e)).audioStream},t.prototype.rawToFile=function(t,e){return this._rawSSMLRequestToFile(t,e)},t.prototype.rawToStream=function(t){return this._rawSSMLRequest(t).audioStream},t.prototype._rawSSMLRequestToFile=function(t,e){return __awaiter(this,void 0,void 0,(function(){var n,o,a,r,s,i,c,u=this;return __generator(this,(function(_){switch(_.label){case 0:n=this._rawSSMLRequest(e),o=n.audioStream,a=n.metadataStream,n.requestId,_.label=1;case 1:return _.trys.push([1,3,,4]),[4,Promise.all([new Promise((function(e,n){var r=o.pipe(fs.createWriteStream(t+"/example_audio.webm"));r.once("close",(function(){return __awaiter(u,void 0,void 0,(function(){return __generator(this,(function(o){return r.bytesWritten>0?e(t+"/example_audio.webm"):n("No audio data received"),[2]}))}))})),a.once("error",n)})),new Promise((function(e,n){var o=[];a.on("data",(function(t){var e=JSON.parse(t.toString());o.push.apply(o,e.Metadata)})),a.on("close",(function(){var n=t+"/example_metadata.json";fs.writeFileSync(n,JSON.stringify(o,null,2)),e(n)})),a.once("error",n)}))])];case 2:return r=_.sent(),s=r[0],i=r[1],[2,{audioFilePath:s,metadataFilePath:i}];case 3:throw c=_.sent(),o.destroy(),a.destroy(),c;case 4:return[2]}}))}))},t.prototype._rawSSMLRequest=function(e){this._metadataCheck();var n=(0,randombytes_1.default)(16).toString("hex"),o="X-RequestId:".concat(n,"\r\nContent-Type:application/ssml+xml\r\nPath:").concat(messageTypes.SSML).concat(t.JSON_XML_DELIM)+e.trim(),a=this,r=new stream_1.Readable({read:function(){},destroy:function(t,e){delete a._streams[n],e(t)}}),s=new stream_1.Readable({read:function(){}});return r.on("error",(function(t){r.destroy(),s.destroy()})),r.once("close",(function(){r.destroy(),s.destroy()})),this._streams[n]={audio:r,metadata:s},this._send(o).then(),{audioStream:r,metadataStream:s,requestId:n}},t.OUTPUT_FORMAT=OUTPUT_FORMAT_1.OUTPUT_FORMAT,t.TRUSTED_CLIENT_TOKEN="6A5AA1D4EAFF4E9FB37E23D68491D6F4",t.VOICES_URL="https://speech.platform.bing.com/consumer/speech/synthesize/readaloud/voices/list?trustedclienttoken=".concat(t.TRUSTED_CLIENT_TOKEN),t.SYNTH_URL="wss://speech.platform.bing.com/consumer/speech/synthesize/readaloud/edge/v1?TrustedClientToken=".concat(t.TRUSTED_CLIENT_TOKEN),t.JSON_XML_DELIM="\r\n\r\n",t.AUDIO_DELIM="Path:audio\r\n",t.VOICE_LANG_REGEX=/\w{2}-\w{2}/,t}();exports.MsEdgeTTS=MsEdgeTTS;
@@ -1 +0,0 @@
1
- "use strict";var OUTPUT_FORMAT;Object.defineProperty(exports,"__esModule",{value:!0}),exports.OUTPUT_FORMAT=void 0,function(O){O.AUDIO_24KHZ_48KBITRATE_MONO_MP3="audio-24khz-48kbitrate-mono-mp3",O.AUDIO_24KHZ_96KBITRATE_MONO_MP3="audio-24khz-96kbitrate-mono-mp3",O.WEBM_24KHZ_16BIT_MONO_OPUS="webm-24khz-16bit-mono-opus"}(OUTPUT_FORMAT||(exports.OUTPUT_FORMAT=OUTPUT_FORMAT={}));
package/dist/PITCH.js DELETED
@@ -1 +0,0 @@
1
- "use strict";var PITCH;Object.defineProperty(exports,"__esModule",{value:!0}),exports.PITCH=void 0,function(e){e.X_LOW="x-low",e.LOW="low",e.MEDIUM="medium",e.HIGH="high",e.X_HIGH="x-high",e.DEFAULT="default"}(PITCH||(exports.PITCH=PITCH={}));
package/dist/RATE.js DELETED
@@ -1 +0,0 @@
1
- "use strict";var RATE;Object.defineProperty(exports,"__esModule",{value:!0}),exports.RATE=void 0,function(e){e.X_SLOW="x-slow",e.SLOW="slow",e.MEDIUM="medium",e.FAST="fast",e.X_FAST="x-fast",e.DEFAULT="default"}(RATE||(exports.RATE=RATE={}));
package/dist/VOLUME.js DELETED
@@ -1 +0,0 @@
1
- "use strict";var VOLUME;Object.defineProperty(exports,"__esModule",{value:!0}),exports.VOLUME=void 0,function(e){e.SILENT="silent",e.X_SOFT="x-soft",e.SOFT="soft",e.MEDIUM="medium",e.LOUD="loud",e.X_LOUD="x-LOUD",e.DEFAULT="default"}(VOLUME||(exports.VOLUME=VOLUME={}));
package/dist/index.js DELETED
@@ -1 +0,0 @@
1
- "use strict";Object.defineProperty(exports,"__esModule",{value:!0}),exports.VOLUME=exports.RATE=exports.PITCH=exports.OUTPUT_FORMAT=exports.ProsodyOptions=exports.MsEdgeTTS=void 0;var MsEdgeTTS_1=require("./MsEdgeTTS");Object.defineProperty(exports,"MsEdgeTTS",{enumerable:!0,get:function(){return MsEdgeTTS_1.MsEdgeTTS}}),Object.defineProperty(exports,"ProsodyOptions",{enumerable:!0,get:function(){return MsEdgeTTS_1.ProsodyOptions}});var OUTPUT_FORMAT_1=require("./OUTPUT_FORMAT");Object.defineProperty(exports,"OUTPUT_FORMAT",{enumerable:!0,get:function(){return OUTPUT_FORMAT_1.OUTPUT_FORMAT}});var PITCH_1=require("./PITCH");Object.defineProperty(exports,"PITCH",{enumerable:!0,get:function(){return PITCH_1.PITCH}});var RATE_1=require("./RATE");Object.defineProperty(exports,"RATE",{enumerable:!0,get:function(){return RATE_1.RATE}});var VOLUME_1=require("./VOLUME");Object.defineProperty(exports,"VOLUME",{enumerable:!0,get:function(){return VOLUME_1.VOLUME}});
@@ -1,141 +0,0 @@
1
- import { OUTPUT_FORMAT } from "./OUTPUT_FORMAT";
2
- import { Readable } from "stream";
3
- import { Agent } from "http";
4
- import { PITCH } from "./PITCH";
5
- import { RATE } from "./RATE";
6
- import { VOLUME } from "./VOLUME";
7
- export type Voice = {
8
- Name: string;
9
- ShortName: string;
10
- Gender: string;
11
- Locale: string;
12
- SuggestedCodec: string;
13
- FriendlyName: string;
14
- Status: string;
15
- };
16
- export declare class ProsodyOptions {
17
- /**
18
- * The pitch to use.
19
- * Can be any {@link PITCH}, or a relative frequency in Hz (+50Hz), a relative semitone (+2st), or a relative percentage (+50%).
20
- * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline)
21
- */
22
- pitch?: PITCH | string;
23
- /**
24
- * The rate to use.
25
- * Can be any {@link RATE}, or a relative number (0.5), or string with a relative percentage (+50%).
26
- * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking)
27
- */
28
- rate?: RATE | string | number;
29
- /**
30
- * The volume to use.
31
- * Can be any {@link VOLUME}, or an absolute number (0, 100), a string with a relative number (+50), or a relative percentage (+50%).
32
- * [SSML documentation](https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume)
33
- */
34
- volume?: VOLUME | string | number;
35
- }
36
- export declare class MetadataOptions {
37
- /**
38
- * (optional) any voice locale that is supported by the voice. See the list of all voices for compatibility. If not provided, the locale will be inferred from the `voiceName`.
39
- * Changing the voiceName will reset the voiceLocale.
40
- */
41
- voiceLocale?: string;
42
- /**
43
- * (optional) whether to enable sentence boundary metadata. Default is `false`
44
- */
45
- sentenceBoundaryEnabled?: boolean;
46
- /**
47
- * (optional) whether to enable word boundary metadata. Default is `false`
48
- */
49
- wordBoundaryEnabled?: boolean;
50
- }
51
- export declare class MsEdgeTTS {
52
- static OUTPUT_FORMAT: typeof OUTPUT_FORMAT;
53
- private static TRUSTED_CLIENT_TOKEN;
54
- private static VOICES_URL;
55
- private static SYNTH_URL;
56
- private static JSON_XML_DELIM;
57
- private static AUDIO_DELIM;
58
- private static VOICE_LANG_REGEX;
59
- private readonly _enableLogger;
60
- private readonly _isBrowser;
61
- private _ws;
62
- private _voice;
63
- private _outputFormat;
64
- private _metadataOptions;
65
- private _streams;
66
- private _startTime;
67
- private readonly _agent;
68
- private _log;
69
- /**
70
- * Create a new `MsEdgeTTS` instance.
71
- *
72
- * @param agent (optional, **NOT SUPPORTED IN BROWSER**) Use a custom http.Agent implementation like [https-proxy-agent](https://github.com/TooTallNate/proxy-agents) or [socks-proxy-agent](https://github.com/TooTallNate/proxy-agents/tree/main/packages/socks-proxy-agent).
73
- * @param enableLogger=false whether to enable the built-in logger. This logs connections inits, disconnects, and incoming data to the console
74
- */
75
- constructor(agent?: Agent, enableLogger?: boolean);
76
- private _send;
77
- private _initClient;
78
- private _pushAudioData;
79
- private _pushMetadata;
80
- private _SSMLTemplate;
81
- /**
82
- * Fetch the list of voices available in Microsoft Edge.
83
- * These, however, are not all. The complete list of voices supported by this module [can be found here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support) (neural, standard, and preview).
84
- */
85
- getVoices(): Promise<Voice[]>;
86
- /**
87
- * Sets the required information for the speech to be synthesised and inits a new WebSocket connection.
88
- * Must be called at least once before text can be synthesised.
89
- * Saved in this instance. Can be called at any time times to update the metadata.
90
- *
91
- * @param voiceName a string with any `ShortName`. A list of all available neural voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#neural-voices). However, it is not limited to neural voices: standard voices can also be used. A list of standard voices can be found [here](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/language-support#standard-voices). Changing the voiceName will reset the voiceLocale.
92
- * @param outputFormat any {@link OUTPUT_FORMAT}
93
- * @param metadataOptions (optional) {@link MetadataOptions}
94
- */
95
- setMetadata(voiceName: string, outputFormat: OUTPUT_FORMAT, metadataOptions?: MetadataOptions): Promise<void>;
96
- private _metadataCheck;
97
- /**
98
- * Close the WebSocket connection.
99
- */
100
- close(): void;
101
- /**
102
- * Writes raw audio synthesised from text to a file. Uses a basic {@link _SSMLTemplate SML template}.
103
- *
104
- * @param dirPath a valid output directory path
105
- * @param input the input to synthesise
106
- * @param options (optional) {@link ProsodyOptions}
107
- @returns {Promise<{audioFilePath: string, metadataFilePath: string}>} - a `Promise` with the full filepaths
108
- */
109
- toFile(dirPath: string, input: string, options?: ProsodyOptions): Promise<{
110
- audioFilePath: string;
111
- metadataFilePath: string;
112
- }>;
113
- /**
114
- * Writes raw audio synthesised from text in real-time to a {@link Readable}. Uses a basic {@link _SSMLTemplate SML template}.
115
- *
116
- * @param input the text to synthesise. Can include SSML elements.
117
- * @param options (optional) {@link ProsodyOptions}
118
- * @returns {Readable} - a `stream.Readable` with the audio data
119
- */
120
- toStream(input: string, options?: ProsodyOptions): Readable;
121
- /**
122
- * Writes raw audio synthesised from text to a file. Has no SSML template. Basic SSML should be provided in the request.
123
- *
124
- * @param dirPath a valid output directory path.
125
- * @param requestSSML the SSML to send. SSML elements required in order to work.
126
- * @returns {Promise<{audioFilePath: string, metadataFilePath: string}>} - a `Promise` with the full filepaths
127
- */
128
- rawToFile(dirPath: string, requestSSML: string): Promise<{
129
- audioFilePath: string;
130
- metadataFilePath: string;
131
- }>;
132
- /**
133
- * Writes raw audio synthesised from a request in real-time to a {@link Readable}. Has no SSML template. Basic SSML should be provided in the request.
134
- *
135
- * @param requestSSML the SSML to send. SSML elements required in order to work.
136
- * @returns {Readable} - a `stream.Readable` with the audio data
137
- */
138
- rawToStream(requestSSML: string): Readable;
139
- private _rawSSMLRequestToFile;
140
- private _rawSSMLRequest;
141
- }
@@ -1,8 +0,0 @@
1
- /**
2
- * Only a few of the [possible formats](https://docs.microsoft.com/en-us/azure/cognitive-services/speech-service/rest-text-to-speech#audio-outputs) are accepted.
3
- */
4
- export declare enum OUTPUT_FORMAT {
5
- AUDIO_24KHZ_48KBITRATE_MONO_MP3 = "audio-24khz-48kbitrate-mono-mp3",
6
- AUDIO_24KHZ_96KBITRATE_MONO_MP3 = "audio-24khz-96kbitrate-mono-mp3",
7
- WEBM_24KHZ_16BIT_MONO_OPUS = "webm-24khz-16bit-mono-opus"
8
- }
@@ -1,11 +0,0 @@
1
- /**
2
- * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,pitch,-Indicates%20the%20baseline
3
- */
4
- export declare enum PITCH {
5
- X_LOW = "x-low",
6
- LOW = "low",
7
- MEDIUM = "medium",
8
- HIGH = "high",
9
- X_HIGH = "x-high",
10
- DEFAULT = "default"
11
- }
@@ -1,11 +0,0 @@
1
- /**
2
- * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,rate,-Indicates%20the%20speaking
3
- */
4
- export declare enum RATE {
5
- X_SLOW = "x-slow",
6
- SLOW = "slow",
7
- MEDIUM = "medium",
8
- FAST = "fast",
9
- X_FAST = "x-fast",
10
- DEFAULT = "default"
11
- }
@@ -1,12 +0,0 @@
1
- /**
2
- * https://learn.microsoft.com/en-us/azure/ai-services/speech-service/speech-synthesis-markup-voice#:~:text=Optional-,volume,-Indicates%20the%20volume
3
- */
4
- export declare enum VOLUME {
5
- SILENT = "silent",
6
- X_SOFT = "x-soft",
7
- SOFT = "soft",
8
- MEDIUM = "medium",
9
- LOUD = "loud",
10
- X_LOUD = "x-LOUD",
11
- DEFAULT = "default"
12
- }
@@ -1,5 +0,0 @@
1
- export { MsEdgeTTS, Voice, ProsodyOptions } from "./MsEdgeTTS";
2
- export { OUTPUT_FORMAT } from "./OUTPUT_FORMAT";
3
- export { PITCH } from "./PITCH";
4
- export { RATE } from "./RATE";
5
- export { VOLUME } from "./VOLUME";