@creativeorange/azure-text-to-speech 1.1.7 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
package/src/main.ts CHANGED
@@ -1,346 +1,2 @@
1
- import {
2
- SpeakerAudioDestination,
3
- AudioConfig,
4
- SpeechConfig,
5
- SpeechSynthesizer,
6
- SpeechSynthesisOutputFormat,
7
- } from 'microsoft-cognitiveservices-speech-sdk';
8
-
9
- export default class TextToSpeech {
10
- key: string;
11
- region: string;
12
- voice: string;
13
- rate: number;
14
- pitch: number;
15
-
16
- textToRead: string = '';
17
-
18
- wordBoundryList: any[] = [];
19
-
20
- clickedNode: any;
21
- highlightDiv: any;
22
-
23
- speechConfig: any;
24
- audioConfig: any;
25
- player: any;
26
- synthesizer: any;
27
-
28
- previousWordBoundary: any;
29
-
30
- interval: any;
31
-
32
- wordEncounters: number[] = [];
33
- originalHighlightDivInnerHTML: string = '';
34
- currentWord: string = '';
35
- currentOffset: number = 0;
36
- wordBoundaryOffset: number = 0;
37
-
38
-
39
- constructor(key: string, region: string, voice: string, rate: number = 0, pitch: number = 0) {
40
- this.key = key;
41
- this.region = region;
42
- this.voice = voice;
43
- this.rate = rate;
44
- this.pitch = pitch;
45
- }
46
-
47
- async start() {
48
- await this.registerBindings(document);
49
- }
50
-
51
- async registerBindings(node: any) {
52
- const nodes = node.childNodes;
53
- for (let i = 0; i < nodes.length; i++) {
54
- if (!nodes[i]) {
55
- continue;
56
- }
57
-
58
- const currentNode = nodes[i];
59
-
60
- if (currentNode.attributes) {
61
- if (currentNode.attributes.getNamedItem('co-tts.id')) {
62
- await this.handleIdModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.id'));
63
- } else if (currentNode.attributes.getNamedItem('co-tts.ajax')) {
64
- await this.handleAjaxModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.ajax'));
65
- } else if (currentNode.attributes.getNamedItem('co-tts')) {
66
- await this.handleDefault(currentNode, currentNode.attributes.getNamedItem('co-tts'));
67
- } else if (currentNode.attributes.getNamedItem('co-tts.stop')) {
68
- await this.handleStopModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.stop'));
69
- } else if (currentNode.attributes.getNamedItem('co-tts.resume')) {
70
- await this.handleResumeModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.resume'));
71
- } else if (currentNode.attributes.getNamedItem('co-tts.pause')) {
72
- await this.handlePauseModifier(currentNode, currentNode.attributes.getNamedItem('co-tts.pause'));
73
- }
74
- }
75
-
76
- if (currentNode.childNodes.length > 0) {
77
- await this.registerBindings(currentNode);
78
- }
79
- }
80
- }
81
-
82
- async handleIdModifier(node: any, attr: Attr) {
83
- node.addEventListener('click', async (_: any) => {
84
- this.stopPlayer();
85
- await this.createInterval();
86
- const referenceDiv = document.getElementById(attr.value);
87
- this.clickedNode = referenceDiv;
88
-
89
- if (!referenceDiv) {
90
- return;
91
- }
92
-
93
- if (referenceDiv.hasAttribute('co-tts.text') && referenceDiv.getAttribute('co-tts.text') !== '') {
94
- this.textToRead = referenceDiv.getAttribute('co-tts.text') ?? '';
95
- } else {
96
- this.textToRead = referenceDiv.innerText;
97
- }
98
-
99
- if (referenceDiv.hasAttribute('co-tts.highlight')) {
100
- if (referenceDiv.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
101
- const newReferenceDiv =
102
- document.getElementById(referenceDiv.attributes.getNamedItem('co-tts.highlight').value);
103
-
104
- this.highlightDiv = newReferenceDiv;
105
- this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
106
- } else {
107
- this.highlightDiv = referenceDiv;
108
- this.originalHighlightDivInnerHTML = referenceDiv.innerHTML;
109
- }
110
- }
111
-
112
- this.startSynthesizer(node, attr);
113
- });
114
- }
115
-
116
- async handleAjaxModifier(node: any, attr: Attr) {
117
- node.addEventListener('click', async (_: any) => {
118
- this.stopPlayer();
119
- await this.createInterval();
120
- this.clickedNode = node;
121
- const response = await fetch(attr.value, {
122
- method: `GET`,
123
- });
124
-
125
- this.textToRead = await response.text();
126
-
127
- this.startSynthesizer(node, attr);
128
- });
129
- }
130
-
131
- async handleDefault(node: any, attr: Attr) {
132
- node.addEventListener('click', async (_: any) => {
133
- this.stopPlayer();
134
- await this.createInterval();
135
- this.clickedNode = node;
136
- if (node.hasAttribute('co-tts.highlight')) {
137
- if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
138
- const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
139
-
140
- this.highlightDiv = newReferenceDiv;
141
- this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
142
- } else {
143
- this.highlightDiv = node;
144
- this.originalHighlightDivInnerHTML = node.innerHTML;
145
- }
146
- }
147
- if (attr.value === '') {
148
- this.textToRead = node.innerText;
149
- } else {
150
- this.textToRead = attr.value;
151
- }
152
-
153
- this.startSynthesizer(node, attr);
154
- });
155
- }
156
-
157
- async handleWithoutClick(node: any, attr: Attr) {
158
- this.stopPlayer();
159
- await this.createInterval();
160
- this.clickedNode = node;
161
- if (node.hasAttribute('co-tts.highlight')) {
162
- if (node.attributes.getNamedItem('co-tts.highlight')?.value !== '') {
163
- const newReferenceDiv = document.getElementById(node.attributes.getNamedItem('co-tts.highlight').value);
164
-
165
- this.highlightDiv = newReferenceDiv;
166
- this.originalHighlightDivInnerHTML = newReferenceDiv.innerHTML;
167
- } else {
168
- this.highlightDiv = node;
169
- this.originalHighlightDivInnerHTML = node.innerHTML;
170
- }
171
- }
172
- if (attr.value === '') {
173
- this.textToRead = node.innerText;
174
- } else {
175
- this.textToRead = attr.value;
176
- }
177
-
178
- this.startSynthesizer(node, attr);
179
- }
180
-
181
- async handleStopModifier(node: any, attr: Attr) {
182
- node.addEventListener('click', async (_: any) => {
183
- await this.stopPlayer();
184
- document.dispatchEvent(new CustomEvent('COAzureTTSStoppedPlaying', {}));
185
- });
186
- }
187
-
188
- async handlePauseModifier(node: any, attr: Attr) {
189
- node.addEventListener('click', async (_: any) => {
190
- await this.clearInterval();
191
- await this.player.pause();
192
- document.dispatchEvent(new CustomEvent('COAzureTTSPausedPlaying', {}));
193
- });
194
- }
195
-
196
- async handleResumeModifier(node: any, attr: Attr) {
197
- node.addEventListener('click', async (_: any) => {
198
- await this.createInterval();
199
- await this.player.resume();
200
- document.dispatchEvent(new CustomEvent('COAzureTTSResumedPlaying', {}));
201
- });
202
- }
203
-
204
- async stopPlayer() {
205
- await this.clearInterval();
206
- if (this.highlightDiv !== undefined) {
207
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
208
- }
209
-
210
- this.textToRead = '';
211
- this.currentWord = '';
212
- this.originalHighlightDivInnerHTML = '';
213
- this.wordBoundryList = [];
214
- this.wordEncounters = [];
215
- if (this.player !== undefined) {
216
- this.player.pause();
217
- }
218
- this.player = undefined;
219
- this.highlightDiv = undefined;
220
- }
221
-
222
- async startSynthesizer(node: any, attr: Attr) {
223
- this.speechConfig = SpeechConfig.fromSubscription(this.key, this.region);
224
-
225
- this.speechConfig.speechSynthesisVoiceName = `Microsoft Server Speech Text to Speech Voice (${this.voice})`;
226
- this.speechConfig.speechSynthesisOutputFormat = SpeechSynthesisOutputFormat.Audio24Khz160KBitRateMonoMp3;
227
-
228
- this.player = new SpeakerAudioDestination();
229
-
230
- this.audioConfig = AudioConfig.fromSpeakerOutput(this.player);
231
- this.synthesizer = new SpeechSynthesizer(this.speechConfig, this.audioConfig);
232
-
233
- this.synthesizer.wordBoundary = (s: any, e: any) => {
234
- this.wordBoundryList.push(e);
235
- };
236
-
237
- this.player.onAudioEnd = async () => {
238
- this.stopPlayer();
239
-
240
- if (this.clickedNode.hasAttribute('co-tts.next')) {
241
- const nextNode = document.getElementById(this.clickedNode.getAttribute('co-tts.next'));
242
- if (nextNode && nextNode.attributes.getNamedItem('co-tts.text')) {
243
- this.handleWithoutClick(nextNode, nextNode.attributes.getNamedItem('co-tts.text'));
244
- } else if (nextNode) {
245
- nextNode.dispatchEvent(new Event('click'));
246
- }
247
- } else {
248
- document.dispatchEvent(new CustomEvent('COAzureTTSFinishedPlaying', {}));
249
- }
250
- };
251
-
252
- this.player.onAudioStart = async () => {
253
- document.dispatchEvent(new CustomEvent('COAzureTTSStartedPlaying', {}));
254
- };
255
-
256
- this.synthesizer.speakSsmlAsync(this.buildSSML(this.textToRead),
257
- () => {
258
- this.synthesizer.close();
259
- this.synthesizer = undefined;
260
- },
261
- () => {
262
- this.synthesizer.close();
263
- this.synthesizer = undefined;
264
- });
265
- }
266
-
267
- async clearInterval() {
268
- clearInterval(this.interval);
269
- }
270
-
271
- async createInterval() {
272
- this.interval = setInterval(() => {
273
- if (this.player !== undefined && this.highlightDiv) {
274
- const currentTime = this.player.currentTime;
275
- let wordBoundary;
276
- for (const e of this.wordBoundryList) {
277
- if (currentTime * 1000 > e.audioOffset / 10000) {
278
- wordBoundary = e;
279
- } else {
280
- break;
281
- }
282
- }
283
-
284
- if (wordBoundary !== undefined) {
285
- if (~['.', ',', '!', '?', '*', '(', ')', '&', '\\', '/', '^', '[', ']', '<', '>', ':']
286
- .indexOf(wordBoundary.text)) {
287
- wordBoundary = this.previousWordBoundary ?? undefined;
288
- }
289
-
290
- if (wordBoundary === undefined) {
291
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
292
- } else {
293
- if (!this.wordEncounters[wordBoundary.text]) {
294
- this.wordEncounters[wordBoundary.text] = 0;
295
- }
296
-
297
- if (this.currentWord !== wordBoundary.text || this.wordBoundaryOffset !== wordBoundary.textOffset) {
298
- this.currentOffset = this.getPosition(
299
- this.originalHighlightDivInnerHTML,
300
- wordBoundary.text,
301
- this.wordEncounters[wordBoundary.text]
302
- );
303
- this.wordEncounters[wordBoundary.text] = this.currentOffset + wordBoundary.wordLength;
304
- this.currentWord = wordBoundary.text;
305
- this.wordBoundaryOffset = wordBoundary.textOffset;
306
- }
307
-
308
- if (this.currentOffset <= -1) {
309
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
310
- } else {
311
- this.previousWordBoundary = wordBoundary;
312
- const startOfString = this.originalHighlightDivInnerHTML.substring(0, this.currentOffset);
313
- const endOffset = this.currentOffset + wordBoundary.wordLength;
314
- const endOfString = this.originalHighlightDivInnerHTML.substring(endOffset);
315
- this.highlightDiv.innerHTML = `
316
- ${startOfString}<mark class='co-tts-highlight'>${wordBoundary.text}</mark>${endOfString}
317
- `;
318
- }
319
- }
320
- } else {
321
- this.highlightDiv.innerHTML = this.originalHighlightDivInnerHTML;
322
- }
323
- }
324
- }, 50);
325
- }
326
-
327
- getPosition(string: string, subString: string, lastOffset: number) {
328
- const regex = new RegExp(`(?:^|[^-\\w])(${subString})\\b`, 'g');
329
- const offset = string.slice(lastOffset).search(regex);
330
- return (offset <= 0 ? offset : offset + 1) + lastOffset;
331
- }
332
-
333
- buildSSML(text: string) {
334
- return `<speak xmlns="http://www.w3.org/2001/10/synthesis"
335
- xmlns:mstts="http://www.w3.org/2001/mstts"
336
- xmlns:emo="http://www.w3.org/2009/10/emotionml"
337
- version="1.0"
338
- xml:lang="en-US">
339
- <voice name="${this.voice}">
340
- <prosody rate="${this.rate}%" pitch="${this.pitch}%">
341
- ${text}
342
- </prosody>
343
- </voice>
344
- </speak>`;
345
- }
346
- }
1
+ export * from './SpeechToText';
2
+ export * from './TextToSpeech';